From 80b331c7f6c3724f2044325e0d7d7c79ae5a4510 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 23 Jul 2010 18:47:21 -0700 Subject: util: Add PIPE_OS_CYGWIN to u_network. --- src/gallium/auxiliary/util/u_network.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index 87ee0e4768..77f2c5fc7d 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include # include -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_CYGWIN) # include # include # include -- cgit v1.2.3 From d26fb6916931f10e029429ecbf46e86484e7e956 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 26 Jul 2010 14:53:06 +0200 Subject: util: fix mutex leaks in mempool --- src/gallium/auxiliary/util/u_mempool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_mempool.c b/src/gallium/auxiliary/util/u_mempool.c index 84e2a34acc..6b1a72a7f6 100644 --- a/src/gallium/auxiliary/util/u_mempool.c +++ b/src/gallium/auxiliary/util/u_mempool.c @@ -126,7 +126,6 @@ void util_mempool_set_thread_safety(struct util_mempool *pool, pool->threading = threading; if (threading) { - pipe_mutex_init(pool->mutex); pool->malloc = util_mempool_malloc_mt; pool->free = util_mempool_free_mt; } else { @@ -152,6 +151,8 @@ void util_mempool_create(struct util_mempool *pool, make_empty_list(&pool->list); + pipe_mutex_init(pool->mutex); + util_mempool_set_thread_safety(pool, threading); } -- cgit v1.2.3 From a3a42e46965221b8f8249f0f1076fc3544b68d0e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 26 Jul 2010 14:56:48 +0200 Subject: util: fix another mutex leak in mempool By fixing one, I introduced another. Crap. --- src/gallium/auxiliary/util/u_mempool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_mempool.c b/src/gallium/auxiliary/util/u_mempool.c index 6b1a72a7f6..1f336b39a1 100644 --- a/src/gallium/auxiliary/util/u_mempool.c +++ b/src/gallium/auxiliary/util/u_mempool.c @@ -165,6 +165,5 @@ void util_mempool_destroy(struct util_mempool *pool) FREE(page); } - if (pool->threading) - pipe_mutex_destroy(pool->mutex); + pipe_mutex_destroy(pool->mutex); } -- cgit v1.2.3 From 3cef6c42bc0966ee988c0e67935053e8ed93ab5e Mon Sep 17 00:00:00 2001 From: nobled Date: Sat, 24 Jul 2010 12:59:40 +0000 Subject: util: fix CPU detection on OS X s/PIPE_OS_DARWIN/PIPE_OS_APPLE, since there is no PIPE_OS_DARWIN. Acked-by: Vinson Lee --- src/gallium/auxiliary/util/u_cpu_detect.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index a08241971c..23d33af4e4 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -38,7 +38,7 @@ #include "u_cpu_detect.h" #if defined(PIPE_ARCH_PPC) -#if defined(PIPE_OS_DARWIN) +#if defined(PIPE_OS_APPLE) #include #else #include @@ -132,7 +132,7 @@ win32_sig_handler_sse(EXCEPTION_POINTERS* ep) #endif /* PIPE_ARCH_X86 */ -#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_DARWIN) +#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE) static jmp_buf __lv_powerpc_jmpbuf; static volatile sig_atomic_t __lv_powerpc_canjump = 0; @@ -153,7 +153,7 @@ sigill_handler(int sig) static void check_os_altivec_support(void) { -#if defined(PIPE_OS_DARWIN) +#if defined(PIPE_OS_APPLE) int sels[2] = {CTL_HW, HW_VECTORUNIT}; int has_vu = 0; int len = sizeof (has_vu); @@ -166,8 +166,8 @@ check_os_altivec_support(void) util_cpu_caps.has_altivec = 1; } } -#else /* !PIPE_OS_DARWIN */ - /* no Darwin, do it the brute-force way */ +#else /* !PIPE_OS_APPLE */ + /* not on Apple/Darwin, do it the brute-force way */ /* this is borrowed from the libmpeg2 library */ signal(SIGILL, sigill_handler); if (setjmp(__lv_powerpc_jmpbuf)) { @@ -184,7 +184,7 @@ check_os_altivec_support(void) signal(SIGILL, SIG_DFL); util_cpu_caps.has_altivec = 1; } -#endif /* PIPE_OS_DARWIN */ +#endif /* !PIPE_OS_APPLE */ } #endif /* PIPE_ARCH_PPC */ -- cgit v1.2.3 From b17ee335e3398cd1bbd26f5411e7ee6fb6839286 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:48:29 -0600 Subject: util: fix unused function warning on non-x86 --- src/gallium/auxiliary/util/u_cpu_detect.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 23d33af4e4..879643463f 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -75,7 +75,10 @@ struct util_cpu_caps util_cpu_caps; +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) static int has_cpuid(void); +#endif + #if defined(PIPE_ARCH_X86) -- cgit v1.2.3 From 2a45972fb2ba12a6561e5cba84d167f4c30566d4 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 27 Jul 2010 13:08:01 +0100 Subject: gallivm: Add lp_build_select_bitwise() alternative to lp_build_select_bitwise When (mask & a) | (~mask & b) is meant instead of mask ? a : b --- src/gallium/auxiliary/gallivm/lp_bld_logic.c | 67 +++++++++++++++++++--------- src/gallium/auxiliary/gallivm/lp_bld_logic.h | 5 +++ 2 files changed, 50 insertions(+), 22 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 39854e43b1..ab4ddb81c4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -362,10 +362,53 @@ lp_build_cmp(struct lp_build_context *bld, } +/** + * Return (mask & a) | (~mask & b); + */ +LLVMValueRef +lp_build_select_bitwise(struct lp_build_context *bld, + LLVMValueRef mask, + LLVMValueRef a, + LLVMValueRef b) +{ + struct lp_type type = bld->type; + LLVMValueRef res; + + if (a == b) { + return a; + } + + if(type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); + } + + a = LLVMBuildAnd(bld->builder, a, mask, ""); + + /* This often gets translated to PANDN, but sometimes the NOT is + * pre-computed and stored in another constant. The best strategy depends + * on available registers, so it is not a big deal -- hopefully LLVM does + * the right decision attending the rest of the program. + */ + b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); + + res = LLVMBuildOr(bld->builder, a, b, ""); + + if(type.floating) { + LLVMTypeRef vec_type = lp_build_vec_type(type); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } + + return res; +} + + /** * Return mask ? a : b; * - * mask is a bitwise mask, composed of 0 or ~0 for each element. + * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value + * will yield unpredictable results. */ LLVMValueRef lp_build_select(struct lp_build_context *bld, @@ -424,27 +467,7 @@ lp_build_select(struct lp_build_context *bld, } } else { - if(type.floating) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); - } - - a = LLVMBuildAnd(bld->builder, a, mask, ""); - - /* This often gets translated to PANDN, but sometimes the NOT is - * pre-computed and stored in another constant. The best strategy depends - * on available registers, so it is not a big deal -- hopefully LLVM does - * the right decision attending the rest of the program. - */ - b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); - - res = LLVMBuildOr(bld->builder, a, b, ""); - - if(type.floating) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); - } + res = lp_build_select_bitwise(bld, mask, a, b); } return res; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index 29f9fc3b20..4e7b4c9938 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -63,6 +63,11 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef +lp_build_select_bitwise(struct lp_build_context *bld, + LLVMValueRef mask, + LLVMValueRef a, + LLVMValueRef b); LLVMValueRef lp_build_select(struct lp_build_context *bld, -- cgit v1.2.3 From da30209afdd77199c98694ef64c6eaea557d0918 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 28 Jul 2010 01:07:03 -0700 Subject: draw: Fix VMware spelling. --- src/gallium/auxiliary/draw/draw_gs.c | 2 +- src/gallium/auxiliary/draw/draw_gs.h | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 79a57a67f3..0c590f936b 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMWare Inc. + * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 2cb634818c..06f4b822a2 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMWare Inc. + * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index bc074df8c2..5c9db12086 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2010 VMWare, Inc. + * Copyright 2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a -- cgit v1.2.3 From 6d28bf917fb1d741d90fd3f05c22769376021fca Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 16 Jul 2010 04:35:58 +0800 Subject: gallium: Implement draw_vbo and set_index_buffer for all drivers. Some drivers define a generic function that is called by all drawing functions. To implement draw_vbo for such drivers, either draw_vbo calls the generic function or the prototype of the generic function is changed to match draw_vbo. Other drivers have no such generic function. draw_vbo is implemented by calling either draw_arrays and draw_elements. For most drivers, set_index_buffer does not mark the state dirty for tracking. Instead, the index buffer state is emitted whenever draw_vbo is called, just like the case with draw_elements. It surely can be improved. --- src/gallium/auxiliary/util/u_draw_quad.h | 30 +++++ src/gallium/drivers/cell/ppu/cell_context.h | 1 + src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 69 ++++++++--- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 16 +++ src/gallium/drivers/failover/fo_context.c | 61 ++++++---- src/gallium/drivers/failover/fo_context.h | 2 + src/gallium/drivers/failover/fo_state.c | 18 +++ src/gallium/drivers/failover/fo_state_emit.c | 5 + src/gallium/drivers/galahad/glhd_context.c | 47 +++++++ src/gallium/drivers/i915/i915_context.c | 70 ++++++++--- src/gallium/drivers/i915/i915_context.h | 1 + src/gallium/drivers/i915/i915_state.c | 14 +++ src/gallium/drivers/i965/brw_context.h | 1 + src/gallium/drivers/i965/brw_draw.c | 87 ++++++++----- src/gallium/drivers/i965/brw_draw_upload.c | 7 +- src/gallium/drivers/i965/brw_pipe_vertex.c | 31 +++++ src/gallium/drivers/identity/id_context.c | 30 +++++ src/gallium/drivers/llvmpipe/lp_context.h | 1 + src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 90 ++++++++++---- src/gallium/drivers/llvmpipe/lp_state_vertex.c | 14 +++ src/gallium/drivers/nv50/nv50_context.c | 1 + src/gallium/drivers/nv50/nv50_context.h | 3 + src/gallium/drivers/nv50/nv50_state.c | 15 +++ src/gallium/drivers/nv50/nv50_vbo.c | 31 +++++ src/gallium/drivers/nvfx/nvfx_context.c | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 5 +- src/gallium/drivers/nvfx/nvfx_state.c | 15 +++ src/gallium/drivers/nvfx/nvfx_vbo.c | 39 +++++- src/gallium/drivers/r300/r300_context.h | 2 + src/gallium/drivers/r300/r300_render.c | 142 ++++++++++++++-------- src/gallium/drivers/r300/r300_render_stencilref.c | 22 ++++ src/gallium/drivers/r300/r300_state.c | 18 +++ src/gallium/drivers/r600/r600_context.c | 1 + src/gallium/drivers/r600/r600_context.h | 3 + src/gallium/drivers/r600/r600_draw.c | 27 ++++ src/gallium/drivers/r600/r600_state.c | 18 +++ src/gallium/drivers/rbug/rbug_context.c | 34 ++++++ src/gallium/drivers/softpipe/sp_context.c | 2 + src/gallium/drivers/softpipe/sp_context.h | 1 + src/gallium/drivers/softpipe/sp_draw_arrays.c | 92 +++++++++----- src/gallium/drivers/softpipe/sp_state.h | 7 ++ src/gallium/drivers/softpipe/sp_state_vertex.c | 14 +++ src/gallium/drivers/svga/svga_context.h | 1 + src/gallium/drivers/svga/svga_pipe_draw.c | 24 ++++ src/gallium/drivers/svga/svga_pipe_vertex.c | 19 +++ src/gallium/drivers/trace/tr_context.c | 52 ++++++++ src/gallium/drivers/trace/tr_dump_state.c | 20 +++ src/gallium/drivers/trace/tr_dump_state.h | 2 + 48 files changed, 1004 insertions(+), 202 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index 42eb184428..1c9f752611 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -29,12 +29,42 @@ #define U_DRAWQUAD_H +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" + + #ifdef __cplusplus extern "C" { #endif struct pipe_resource; + +static INLINE void +util_draw_init_info(struct pipe_draw_info *info) +{ + memset(info, 0, sizeof(*info)); + info->instance_count = 1; + info->max_index = 0xffffffff; +} + + +static INLINE void +util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.min_index = start; + info.max_index = start + count - 1; + + pipe->draw_vbo(pipe, &info); +} + + extern void util_draw_vertex_buffer(struct pipe_context *pipe, struct pipe_resource *vbuf, uint offset, diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index dc46e59a2d..d1aee62ba1 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -132,6 +132,7 @@ struct cell_context struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; uint num_vertex_buffers; + struct pipe_index_buffer index_buffer; ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; ubyte *zsbuf_map; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 6a1e4d8a64..e06226fbfe 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "util/u_inlines.h" +#include "util/u_draw_quad.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -56,16 +57,11 @@ * XXX should the element buffer be specified/bound with a separate function? */ static void -cell_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) +cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct cell_context *cell = cell_context(pipe); struct draw_context *draw = cell->draw; + void *mapped_indices = NULL; unsigned i; if (cell->dirty) @@ -83,18 +79,20 @@ cell_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = cell_resource(indexBuffer)->data; - draw_set_mapped_element_buffer(draw, indexSize, indexBias, mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + if (info->indexed && cell->index_buffer.buffer) { + mapped_indices = cell_resource(cell->index_buffer.buffer)->data; + mapped_indices += cell->index_buffer.offset; } + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + lp->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); /* draw! */ - draw_arrays(draw, mode, start, count); + draw_arrays(draw, info->mode, info->start, info->count); /* * unmap vertex/index buffers - will cause draw module to flush @@ -102,7 +100,7 @@ cell_draw_range_elements(struct pipe_context *pipe, for (i = 0; i < cell->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } @@ -115,6 +113,44 @@ cell_draw_range_elements(struct pipe_context *pipe, } +static void +cell_draw_range_elements(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct cell_context *cell = cell_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = indexBias; + info.min_index = min_index; + info.max_index = max_index; + + if (indexBuffer) { + info.indexed = TRUE; + saved_ib = cell->index_buffer; + + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + cell_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + + static void cell_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, @@ -142,5 +178,6 @@ cell_init_draw_functions(struct cell_context *cell) cell->pipe.draw_arrays = cell_draw_arrays; cell->pipe.draw_elements = cell_draw_elements; cell->pipe.draw_range_elements = cell_draw_range_elements; + cell->pipe.draw_vbo = cell_draw_vbo; } diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 69152b6cbf..4e3701cd0a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -91,10 +91,26 @@ cell_set_vertex_buffers(struct pipe_context *pipe, } +static void +cell_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct cell_context *cell = cell_context(pipe); + + if (ib) + memcpy(&cell->index_buffer, ib, sizeof(cell->index_buffer)); + else + memset(&cell->index_buffer, 0, sizeof(cell->index_buffer)); + + /* TODO make this more like a state */ +} + + void cell_init_vertex_functions(struct cell_context *cell) { cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; + cell->pipe.set_index_buffer = cell_set_index_buffer; cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 9c9c1bdc45..1048d58313 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -28,6 +28,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" +#include "util/u_draw_quad.h" #include "pipe/p_context.h" #include "fo_context.h" @@ -50,13 +51,8 @@ void failover_fail_over( struct failover_context *failover ) } -static void failover_draw_elements( struct pipe_context *pipe, - struct pipe_resource *indexResource, - unsigned indexSize, - int indexBias, - unsigned prim, - unsigned start, - unsigned count) +static void failover_draw_vbo( struct pipe_context *pipe, + const struct pipe_draw_info *info) { struct failover_context *failover = failover_context( pipe ); @@ -70,13 +66,7 @@ static void failover_draw_elements( struct pipe_context *pipe, /* Try hardware: */ if (failover->mode == FO_HW) { - failover->hw->draw_elements( failover->hw, - indexResource, - indexSize, - indexBias, - prim, - start, - count ); + failover->hw->draw_vbo( failover->hw, info ); } /* Possibly try software: @@ -88,13 +78,7 @@ static void failover_draw_elements( struct pipe_context *pipe, failover_state_emit( failover ); } - failover->sw->draw_elements( failover->sw, - indexResource, - indexSize, - indexBias, - prim, - start, - count ); + failover->sw->draw_vbo( failover->sw, info ); /* Be ready to switch back to hardware rendering without an * intervening flush. Unlikely to be much performance impact to @@ -105,6 +89,40 @@ static void failover_draw_elements( struct pipe_context *pipe, } +static void failover_draw_elements( struct pipe_context *pipe, + struct pipe_resource *indexResource, + unsigned indexSize, + int indexBias, + unsigned prim, + unsigned start, + unsigned count) +{ + struct failover_context *failover = failover_context( pipe ); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = prim; + info.start = start; + info.count = count; + + if (indexResource) { + info.indexed = TRUE; + saved_ib = failover->index_buffer; + + ib.buffer = indexResource; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + failover_draw_vbo(pipe, &info); + + if (indexResource) + pipe->set_index_buffer(pipe, &saved_ib); +} + + static void failover_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { @@ -145,6 +163,7 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->pipe.draw_arrays = failover_draw_arrays; failover->pipe.draw_elements = failover_draw_elements; + failover->pipe.draw_vbo = failover_draw_vbo; failover->pipe.clear = hw->clear; failover->pipe.clear_render_target = hw->clear_render_target; failover->pipe.clear_depth_stencil = hw->clear_depth_stencil; diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 9d3e0d0dba..1afa6c9cee 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -56,6 +56,7 @@ #define FO_NEW_VERTEX_BUFFER 0x40000 #define FO_NEW_VERTEX_ELEMENT 0x80000 #define FO_NEW_SAMPLE_MASK 0x100000 +#define FO_NEW_INDEX_BUFFER 0x200000 @@ -97,6 +98,7 @@ struct failover_context { struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; uint num_vertex_buffers; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 12e42379f9..c265f381b6 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -583,6 +583,23 @@ failover_set_vertex_buffers(struct pipe_context *pipe, } +static void +failover_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct failover_context *failover = failover_context(pipe); + + if (ib) + memcpy(&failover->index_buffer, ib, sizeof(failover->index_buffer)); + else + memset(&failover->index_buffer, 0, sizeof(failover->index_buffer)); + + failover->dirty |= FO_NEW_INDEX_BUFFER; + failover->sw->set_index_buffer( failover->sw, ib ); + failover->hw->set_index_buffer( failover->hw, ib ); +} + + void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, @@ -635,6 +652,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_vertex_sampler_views = failover_set_vertex_sampler_views; failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; + failover->pipe.set_index_buffer = failover_set_index_buffer; failover->pipe.set_constant_buffer = failover_set_constant_buffer; failover->pipe.create_sampler_view = failover_create_sampler_view; failover->pipe.sampler_view_destroy = failover_sampler_view_destroy; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index 147f23269c..7f434ff9d6 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -135,5 +135,10 @@ failover_state_emit( struct failover_context *failover ) failover->vertex_buffers ); } + if (failover->dirty & FO_NEW_INDEX_BUFFER) { + failover->sw->set_index_buffer( failover->sw, + &failover->index_buffer ); + } + failover->dirty = 0; } diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index ab6f17b3ab..6473f2d499 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -112,6 +112,16 @@ galahad_draw_range_elements(struct pipe_context *_pipe, count); } +static void +galahad_draw_vbo(struct pipe_context *_pipe, + const struct pipe_draw_info *info) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->draw_vbo(pipe, info); +} + static struct pipe_query * galahad_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -650,6 +660,41 @@ galahad_set_vertex_buffers(struct pipe_context *_pipe, num_buffers, buffers); } + +static void +galahad_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib->buffer) { + switch (_ib->index_size) { + case 1: + case 2: + case 4: + break; + default: + glhd_warn("index buffer %p has unrecognized index size %d", + _ib->buffer, _ib->index_size); + break; + } + } + else if (_ib->offset || _ib->index_size) { + glhd_warn("non-indexed state with index offset %d and index size %d", + _ib->offset, _ib->index_size); + } + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = galahad_resource_unwrap(_ib->buffer); + ib = &unwrapped_ib; + } + + pipe->set_index_buffer(pipe, ib); +} + static void galahad_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *_dst, @@ -937,6 +982,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.draw_arrays = galahad_draw_arrays; glhd_pipe->base.draw_elements = galahad_draw_elements; glhd_pipe->base.draw_range_elements = galahad_draw_range_elements; + glhd_pipe->base.draw_vbo = galahad_draw_vbo; glhd_pipe->base.create_query = galahad_create_query; glhd_pipe->base.destroy_query = galahad_destroy_query; glhd_pipe->base.begin_query = galahad_begin_query; @@ -976,6 +1022,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.set_fragment_sampler_views = galahad_set_fragment_sampler_views; glhd_pipe->base.set_vertex_sampler_views = galahad_set_vertex_sampler_views; glhd_pipe->base.set_vertex_buffers = galahad_set_vertex_buffers; + glhd_pipe->base.set_index_buffer = galahad_set_index_buffer; glhd_pipe->base.resource_copy_region = galahad_resource_copy_region; glhd_pipe->base.clear = galahad_clear; glhd_pipe->base.clear_render_target = galahad_clear_render_target; diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 2af9bdac95..ca07b3e235 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -36,6 +36,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_draw_quad.h" #include "pipe/p_screen.h" @@ -45,16 +46,11 @@ static void -i915_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned prim, unsigned start, unsigned count) +i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct i915_context *i915 = i915_context(pipe); struct draw_context *draw = i915->draw; + void *mapped_indices = NULL; unsigned i; if (i915->dirty) @@ -71,16 +67,18 @@ i915_draw_range_elements(struct pipe_context *pipe, /* * Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = i915_buffer(indexBuffer)->data; - draw_set_mapped_element_buffer_range(draw, indexSize, indexBias, - min_index, - max_index, - mapped_indexes); - } else { - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + if (info->indexed && i915->index_buffer.buffer) { + mapped_indices = i915_buffer(i915->index_buffer.buffer)->data; + mapped_indices += i915->index_buffer.offset; } + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + i915->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, i915->current.constants[PIPE_SHADER_VERTEX], @@ -90,7 +88,7 @@ i915_draw_range_elements(struct pipe_context *pipe, /* * Do the drawing */ - draw_arrays(i915->draw, prim, start, count); + draw_arrays(i915->draw, info->mode, info->start, info->count); /* * unmap vertex/index buffers @@ -99,11 +97,48 @@ i915_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } } +static void +i915_draw_range_elements(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = prim; + info.start = start; + info.count = count; + info.index_bias = indexBias; + info.min_index = min_index; + info.max_index = max_index; + + if (indexBuffer) { + info.indexed = TRUE; + saved_ib = i915->index_buffer; + + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + i915_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + static void i915_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, @@ -171,6 +206,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->base.draw_arrays = i915_draw_arrays; i915->base.draw_elements = i915_draw_elements; i915->base.draw_range_elements = i915_draw_range_elements; + i915->base.draw_vbo = i915_draw_vbo; /* * Create drawing context and plug our rendering stage into it. diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index b210cb130d..3ae61d0ea7 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -221,6 +221,7 @@ struct i915_context struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; unsigned dirty; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index e767aa9f8f..385c3b2d2d 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -812,6 +812,19 @@ i915_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) FREE( velems ); } +static void i915_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct i915_context *i915 = i915_context(pipe); + + if (ib) + memcpy(&i915->index_buffer, ib, sizeof(i915->index_buffer)); + else + memset(&i915->index_buffer, 0, sizeof(i915->index_buffer)); + + /* TODO make this more like a state */ +} + static void i915_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) @@ -860,4 +873,5 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.sampler_view_destroy = i915_sampler_view_destroy; i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; + i915->base.set_index_buffer = i915_set_index_buffer; } diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 94c9c443f0..56d351f97d 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -576,6 +576,7 @@ struct brw_context */ struct pipe_resource *index_buffer; unsigned index_size; + unsigned index_offset; /* Updates are signalled by PIPE_NEW_INDEX_RANGE: */ diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 4625c2048f..fa7d047e0b 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -29,6 +29,7 @@ #include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_upload_mgr.h" +#include "util/u_draw_quad.h" #include "brw_draw.h" #include "brw_defines.h" @@ -142,7 +143,7 @@ static int brw_emit_prim(struct brw_context *brw, */ static int try_draw_range_elements(struct brw_context *brw, - struct pipe_resource *index_buffer, + boolean indexed, unsigned hw_prim, unsigned start, unsigned count) { @@ -165,7 +166,7 @@ try_draw_range_elements(struct brw_context *brw, if (ret) return ret; - ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim); + ret = brw_emit_prim(brw, start, count, indexed, hw_prim); if (ret) return ret; @@ -177,61 +178,86 @@ try_draw_range_elements(struct brw_context *brw, static void -brw_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) +brw_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct brw_context *brw = brw_context(pipe); int ret; uint32_t hw_prim; - hw_prim = brw_set_prim(brw, mode); + hw_prim = brw_set_prim(brw, info->mode); if (BRW_DEBUG & DEBUG_PRIMS) debug_printf("PRIM: %s start %d count %d index_buffer %p\n", - u_prim_name(mode), start, count, (void *)index_buffer); - - assert(index_bias == 0); + u_prim_name(info->mode), info->start, info->count, + (void *) brw->curr.index_buffer); - /* Potentially trigger upload of new index buffer. - * - * XXX: do we need to go through state validation to achieve this? - * Could just call upload code directly. - */ - if (brw->curr.index_buffer != index_buffer || - brw->curr.index_size != index_size) { - pipe_resource_reference( &brw->curr.index_buffer, index_buffer ); - brw->curr.index_size = index_size; - brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER; - } + assert(info->index_bias == 0); - /* XXX: do we really care? + /* Potentially trigger upload of new index buffer range. + * XXX: do we really care? */ - if (brw->curr.min_index != min_index || - brw->curr.max_index != max_index) + if (brw->curr.min_index != info->min_index || + brw->curr.max_index != info->max_index) { - brw->curr.min_index = min_index; - brw->curr.max_index = max_index; + brw->curr.min_index = info->min_index; + brw->curr.max_index = info->max_index; brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE; } /* Make a first attempt at drawing: */ - ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); + ret = try_draw_range_elements(brw, info->indexed, + hw_prim, info->start, info->count); /* Otherwise, flush and retry: */ if (ret != 0) { brw_context_flush( brw ); - ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); + ret = try_draw_range_elements(brw, info->indexed, + hw_prim, info->start, info->count); assert(ret == 0); } } +static void +brw_draw_range_elements(struct pipe_context *pipe, + struct pipe_resource *index_buffer, + unsigned index_size, int index_bias, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct brw_context *brw = brw_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + info.min_index = min_index; + info.max_index = max_index; + + if (index_buffer) { + info.indexed = TRUE; + saved_ib.buffer = brw->curr.index_buffer; + saved_ib.offset = brw->curr.index_offset; + saved_ib.index_size = brw->curr.index_size; + + ib.buffer = index_buffer; + ib.offset = 0; + ib.index_size = index_size; + pipe->set_index_buffer(pipe, &ib); + } + + brw_draw_vbo(pipe, &info); + + if (index_buffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + static void brw_draw_elements(struct pipe_context *pipe, struct pipe_resource *index_buffer, @@ -262,6 +288,7 @@ boolean brw_draw_init( struct brw_context *brw ) brw->base.draw_arrays = brw_draw_arrays; brw->base.draw_elements = brw_draw_elements; brw->base.draw_range_elements = brw_draw_range_elements; + brw->base.draw_vbo = brw_draw_vbo; /* Create helpers for uploading data in user buffers: */ diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index 337eee8cd9..ebeb1e146a 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -231,7 +231,7 @@ static int brw_prepare_indices(struct brw_context *brw) struct pipe_resource *upload_buf = NULL; struct brw_winsys_buffer *bo = NULL; GLuint offset; - GLuint index_size; + GLuint index_size, index_offset; GLuint ib_size; int ret; @@ -246,13 +246,14 @@ static int brw_prepare_indices(struct brw_context *brw) ib_size = index_buffer->width0; index_size = brw->curr.index_size; + index_offset = brw->curr.index_offset; /* Turn userbuffer into a proper hardware buffer? */ if (brw_buffer_is_user_buffer(index_buffer)) { ret = u_upload_buffer( brw->vb.upload_index, - 0, + index_offset, ib_size, index_buffer, &offset, @@ -269,7 +270,7 @@ static int brw_prepare_indices(struct brw_context *brw) else { bo = brw_buffer(index_buffer)->bo; ib_size = bo->size; - offset = 0; + offset = index_offset; } /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index 4a120a51da..007239efc4 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -274,10 +274,41 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe, } +static void brw_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct brw_context *brw = brw_context(pipe); + + if (ib) { + if (brw->curr.index_buffer == ib->buffer && + brw->curr.index_offset == ib->offset && + brw->curr.index_size == ib->index_size) + return; + + pipe_resource_reference(&brw->curr.index_buffer, ib->buffer); + brw->curr.index_offset = ib->offset; + brw->curr.index_size = ib->index_size; + } + else { + if (!brw->curr.index_buffer && + !brw->curr.index_offset && + !brw->curr.index_size) + return; + + pipe_resource_reference(&brw->curr.index_buffer, NULL); + brw->curr.index_offset = 0; + brw->curr.index_size = 0; + } + + brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER; +} + + void brw_pipe_vertex_init( struct brw_context *brw ) { brw->base.set_vertex_buffers = brw_set_vertex_buffers; + brw->base.set_index_buffer = brw_set_index_buffer; brw->base.create_vertex_elements_state = brw_create_vertex_elements_state; brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state; brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state; diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 67be895b38..e10d3a1413 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -110,6 +110,16 @@ identity_draw_range_elements(struct pipe_context *_pipe, count); } +static void +identity_draw_vbo(struct pipe_context *_pipe, + const struct pipe_draw_info *info) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + + pipe->draw_vbo(pipe, info); +} + static struct pipe_query * identity_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -611,6 +621,24 @@ identity_set_vertex_buffers(struct pipe_context *_pipe, num_buffers, buffers); } + +static void +identity_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = identity_resource_unwrap(_ib->buffer); + ib = &unwrapped_ib; + } + + pipe->set_index_buffer(pipe, ib); +} + static void identity_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *_dst, @@ -892,6 +920,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.draw_arrays = identity_draw_arrays; id_pipe->base.draw_elements = identity_draw_elements; id_pipe->base.draw_range_elements = identity_draw_range_elements; + id_pipe->base.draw_vbo = identity_draw_vbo; id_pipe->base.create_query = identity_create_query; id_pipe->base.destroy_query = identity_destroy_query; id_pipe->base.begin_query = identity_begin_query; @@ -931,6 +960,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.set_fragment_sampler_views = identity_set_fragment_sampler_views; id_pipe->base.set_vertex_sampler_views = identity_set_vertex_sampler_views; id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers; + id_pipe->base.set_index_buffer = identity_set_index_buffer; id_pipe->base.resource_copy_region = identity_resource_copy_region; id_pipe->base.clear = identity_clear; id_pipe->base.clear_render_target = identity_clear_render_target; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index b2643ab33c..50f9091c3c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -77,6 +77,7 @@ struct llvmpipe_context { struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; struct { struct llvmpipe_resource *buffer[PIPE_MAX_SO_BUFFERS]; int offset[PIPE_MAX_SO_BUFFERS]; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 625d0c8a8c..b6dbb9d288 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "lp_context.h" #include "lp_state.h" @@ -49,20 +50,11 @@ * the drawing to the 'draw' module. */ static void -llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) +llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct llvmpipe_context *lp = llvmpipe_context(pipe); struct draw_context *draw = lp->draw; + void *mapped_indices = NULL; unsigned i; if (lp->dirty) @@ -77,27 +69,25 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, } /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = llvmpipe_resource_data(indexBuffer); - draw_set_mapped_element_buffer_range(draw, - indexSize, - indexBias, - minIndex, - maxIndex, - mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, 0, start, - start + count - 1, NULL); + if (info->indexed && lp->index_buffer.buffer) { + mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer); + mapped_indices += lp->index_buffer.offset; } + + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + lp->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); + llvmpipe_prepare_vertex_sampling(lp, lp->num_vertex_sampler_views, lp->vertex_sampler_views); /* draw! */ - draw_arrays_instanced(draw, mode, start, count, - startInstance, instanceCount); + draw_arrays_instanced(draw, info->mode, info->start, info->count, + info->start_instance, info->instance_count); /* * unmap vertex/index buffers @@ -105,7 +95,7 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, for (i = 0; i < lp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } llvmpipe_cleanup_vertex_sampling(lp); @@ -119,6 +109,50 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, } +static void +llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + struct llvmpipe_context *lp = llvmpipe_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.start_instance = startInstance; + info.instance_count = instanceCount; + + info.index_bias = indexBias; + info.min_index = minIndex; + info.max_index = maxIndex; + + if (indexBuffer) { + info.indexed = TRUE; + saved_ib = lp->index_buffer; + + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + llvmpipe_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + static void llvmpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned mode, @@ -227,4 +261,6 @@ llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; llvmpipe->pipe.draw_arrays_instanced = llvmpipe_draw_arrays_instanced; llvmpipe->pipe.draw_elements_instanced = llvmpipe_draw_elements_instanced; + + llvmpipe->pipe.draw_vbo = llvmpipe_draw_vbo; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index 113f13db01..d86e66b4fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -89,6 +89,19 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe, } +static void +llvmpipe_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (ib) + memcpy(&llvmpipe->index_buffer, ib, sizeof(llvmpipe->index_buffer)); + else + memset(&llvmpipe->index_buffer, 0, sizeof(llvmpipe->index_buffer)); + + /* TODO make this more like a state */ +} void llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) @@ -98,4 +111,5 @@ llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; + llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 915a925402..3fc39c1137 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -86,6 +86,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv) nv50->pipe.draw_arrays_instanced = nv50_draw_arrays_instanced; nv50->pipe.draw_elements = nv50_draw_elements; nv50->pipe.draw_elements_instanced = nv50_draw_elements_instanced; + nv50->pipe.draw_vbo = nv50_draw_vbo; nv50->pipe.clear = nv50_clear; nv50->pipe.flush = nv50_flush; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 12c4a93a9b..a7c2b5d487 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -148,6 +148,7 @@ struct nv50_context { struct pipe_resource *constbuf[PIPE_SHADER_TYPES]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; + struct pipe_index_buffer idxbuf; struct nv50_vtxelt_stateobj *vtxelt; struct nv50_sampler_stateobj *sampler[3][PIPE_MAX_SAMPLERS]; unsigned sampler_nr[3]; @@ -197,6 +198,8 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe, unsigned count, unsigned startInstance, unsigned instanceCount); +extern void nv50_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info); extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso); extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 42c5a58318..ec0c0ff283 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -742,6 +742,20 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, nv50->dirty |= NV50_NEW_ARRAYS; } +static void +nv50_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (ib) + memcpy(&nv50->idxbuf, ib, sizeof(nv50->idxbuf)); + else + memset(&nv50->idxbuf, 0, sizeof(nv50->idxbuf)); + + /* TODO make this more like a state */ +} + static void * nv50_vtxelts_state_create(struct pipe_context *pipe, unsigned num_elements, @@ -827,5 +841,6 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.bind_vertex_elements_state = nv50_vtxelts_state_bind; nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; + nv50->pipe.set_index_buffer = nv50_set_index_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 864cb09352..11ffc182c2 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -473,6 +473,37 @@ nv50_draw_elements(struct pipe_context *pipe, mode, start, count, 0, 1); } +void +nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (info->indexed && nv50->idxbuf.buffer) { + unsigned offset; + + assert(nv50->idxbuf.offset % nv50->idxbuf.index_size == 0); + offset = nv50->idxbuf.offset / nv50->idxbuf.index_size; + + nv50_draw_elements_instanced(pipe, + nv50->idxbuf.buffer, + nv50->idxbuf.index_size, + info->index_bias, + info->mode, + info->start + offset, + info->count, + info->start_instance, + info->instance_count); + } + else { + nv50_draw_arrays_instanced(pipe, + info->mode, + info->start, + info->count, + info->start_instance, + info->instance_count); + } +} + static INLINE boolean nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, struct nouveau_stateobj **pso, diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 6d2dc4d5bf..f30795f69a 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -57,6 +57,7 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.destroy = nvfx_destroy; nvfx->pipe.draw_arrays = nvfx_draw_arrays; nvfx->pipe.draw_elements = nvfx_draw_elements; + nvfx->pipe.draw_vbo = nvfx_draw_vbo; nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nvfx_flush; diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index e48f9f3aa8..d6cd272eed 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -121,7 +121,8 @@ struct nvfx_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; - struct pipe_resource *idxbuf; + struct pipe_index_buffer idxbuf; + struct pipe_resource *idxbuf_buffer; unsigned idxbuf_format; struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; @@ -242,6 +243,8 @@ extern void nvfx_draw_elements(struct pipe_context *pipe, unsigned indexSize, int indexBias, unsigned mode, unsigned start, unsigned count); +extern void nvfx_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info); /* nvfx_vertprog.c */ extern boolean nvfx_vertprog_validate(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 30322d46d9..cd58e439d7 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -555,6 +555,20 @@ nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, nvfx->draw_dirty |= NVFX_NEW_ARRAYS; } +static void +nvfx_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + if (ib) + memcpy(&nvfx->idxbuf, ib, sizeof(nvfx->idxbuf)); + else + memset(&nvfx->idxbuf, 0, sizeof(nvfx->idxbuf)); + + /* TODO make this more like a state */ +} + static void * nvfx_vtxelts_state_create(struct pipe_context *pipe, unsigned num_elements, @@ -635,4 +649,5 @@ nvfx_init_state_functions(struct nvfx_context *nvfx) nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; + nvfx->pipe.set_index_buffer = nvfx_set_index_buffer; } diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 520bae5aed..23a59b589b 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -85,7 +85,7 @@ nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib, unsigned type; if (!ib) { - nvfx->idxbuf = NULL; + nvfx->idxbuf_buffer = NULL; nvfx->idxbuf_format = 0xdeadbeef; return FALSE; } @@ -104,10 +104,10 @@ nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib, return FALSE; } - if (ib != nvfx->idxbuf || + if (ib != nvfx->idxbuf_buffer || type != nvfx->idxbuf_format) { nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->idxbuf = ib; + nvfx->idxbuf_buffer = ib; nvfx->idxbuf_format = type; } @@ -491,11 +491,38 @@ nvfx_draw_elements(struct pipe_context *pipe, pipe->flush(pipe, 0, NULL); } +void +nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + if (info->indexed && nvfx->idxbuf.buffer) { + unsigned offset; + + assert(nvfx->idxbuf.offset % nvfx->idxbuf.index_size == 0); + offset = nvfx->idxbuf.offset / nvfx->idxbuf.index_size; + + nvfx_draw_elements(pipe, + nvfx->idxbuf.buffer, + nvfx->idxbuf.index_size, + info->index_bias, + info->mode, + info->start + offset, + info->count); + } + else { + nvfx_draw_arrays(pipe, + info->mode, + info->start, + info->count); + } +} + boolean nvfx_vbo_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; - struct pipe_resource *ib = nvfx->idxbuf; + struct pipe_resource *ib = nvfx->idxbuf_buffer; unsigned ib_format = nvfx->idxbuf_format; int i; int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); @@ -610,10 +637,10 @@ nvfx_vbo_relocate(struct nvfx_context *nvfx) } } - if(nvfx->idxbuf) + if(nvfx->idxbuf_buffer) { unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf)->bo; + struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf_buffer)->bo; assert(nvfx->screen->index_buffer_reloc_flags); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index b4256c6278..7c77a46016 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -524,6 +524,8 @@ struct r300_context { struct r300_vertex_element_state *velems; bool any_user_vbs; + struct pipe_index_buffer index_buffer; + /* Vertex info for Draw. */ struct vertex_info vertex_info; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index bae02135da..da96098cc4 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -33,6 +33,7 @@ #include "util/u_memory.h" #include "util/u_upload_mgr.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "r300_cs.h" #include "r300_context.h" @@ -638,26 +639,56 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, } } +static void r300_draw_vbo(struct pipe_context* pipe, + const struct pipe_draw_info *info) +{ + struct r300_context* r300 = r300_context(pipe); + + if (info->indexed && r300->index_buffer.buffer) { + unsigned offset; + + assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); + offset = r300->index_buffer.offset / r300->index_buffer.index_size; + + r300_draw_range_elements(pipe, + r300->index_buffer.buffer, + r300->index_buffer.index_size, + info->index_bias, + info->min_index, + info->max_index, + info->mode, + info->start + offset, + info->count); + } + else { + r300_draw_arrays(pipe, + info->mode, + info->start, + info->count); + } +} + /**************************************************************************** * The rest of this file is for SW TCL rendering only. Please be polite and * * keep these functions separated so that they are easier to locate. ~C. * ***************************************************************************/ -/* SW TCL arrays, using Draw. */ -static void r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count) +/* SW TCL elements, using Draw. */ +static void r300_swtcl_draw_vbo(struct pipe_context* pipe, + const struct pipe_draw_info *info) { struct r300_context* r300 = r300_context(pipe); struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; + struct pipe_transfer *ib_transfer; + unsigned count = info->count; int i; + void* indices = NULL; if (r300->skip_rendering) { return; } - if (!u_trim_pipe_prim(mode, &count)) { + if (!u_trim_pipe_prim(info->mode, &count)) { return; } @@ -667,13 +698,25 @@ static void r300_swtcl_draw_arrays(struct pipe_context* pipe, void* buf = pipe_buffer_map(pipe, r300->vertex_buffer[i].buffer, PIPE_TRANSFER_READ, - &vb_transfer[i]); + &vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, buf); } - draw_set_mapped_element_buffer(r300->draw, 0, 0, NULL); + if (info->indexed && r300->index_buffer.buffer) { + indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, + PIPE_TRANSFER_READ, &ib_transfer); + if (indices) + indices += r300->index_buffer.offset; + } + + draw_set_mapped_element_buffer_range(r300->draw, (indices) ? + r300->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + indices); - draw_arrays(r300->draw, mode, start, count); + draw_arrays(r300->draw, info->mode, info->start, count); /* XXX Not sure whether this is the best fix. * It prevents CS from being rejected and weird assertion failures. */ @@ -681,9 +724,15 @@ static void r300_swtcl_draw_arrays(struct pipe_context* pipe, for (i = 0; i < r300->vertex_buffer_count; i++) { pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); + vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } + + if (ib_transfer) { + pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer); + draw_set_mapped_element_buffer_range(r300->draw, 0, 0, info->start, + info->start + count - 1, NULL); + } } /* SW TCL elements, using Draw. */ @@ -698,51 +747,40 @@ static void r300_swtcl_draw_range_elements(struct pipe_context* pipe, unsigned count) { struct r300_context* r300 = r300_context(pipe); - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; - struct pipe_transfer *ib_transfer; - int i; - void* indices; - - if (r300->skip_rendering) { - return; - } - - if (!u_trim_pipe_prim(mode, &count)) { - return; + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = indexBias; + info.min_index = minIndex; + info.max_index = maxIndex; + + if (indexBuffer) { + info.indexed = TRUE; + + saved_ib = r300->index_buffer; + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); } - r300_update_derived_state(r300); - - for (i = 0; i < r300->vertex_buffer_count; i++) { - void* buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, - &vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, buf); - } - - indices = pipe_buffer_map(pipe, indexBuffer, - PIPE_TRANSFER_READ, &ib_transfer); - draw_set_mapped_element_buffer_range(r300->draw, indexSize, indexBias, - minIndex, maxIndex, indices); - - draw_arrays(r300->draw, mode, start, count); - - /* XXX Not sure whether this is the best fix. - * It prevents CS from being rejected and weird assertion failures. */ - draw_flush(r300->draw); + r300_swtcl_draw_vbo(pipe, &info); - for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, NULL); - } + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} - pipe_buffer_unmap(pipe, indexBuffer, - ib_transfer); - draw_set_mapped_element_buffer_range(r300->draw, 0, 0, - start, start + count - 1, - NULL); +static void r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count) +{ + r300_swtcl_draw_range_elements(pipe, NULL, 0, 0, + start, start + count -1, mode, start, count); } /* Object for rendering using Draw. */ @@ -1148,9 +1186,11 @@ void r300_init_render_functions(struct r300_context *r300) if (r300->screen->caps.has_tcl) { r300->context.draw_arrays = r300_draw_arrays; r300->context.draw_range_elements = r300_draw_range_elements; + r300->context.draw_vbo = r300_draw_vbo; } else { r300->context.draw_arrays = r300_swtcl_draw_arrays; r300->context.draw_range_elements = r300_swtcl_draw_range_elements; + r300->context.draw_vbo = r300_swtcl_draw_vbo; } r300->context.resource_resolve = r300_resource_resolve; diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c index 9a6b4e12ff..6d801cf159 100644 --- a/src/gallium/drivers/r300/r300_render_stencilref.c +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -42,6 +42,9 @@ struct r300_stencilref_context { unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, unsigned mode, unsigned start, unsigned count); + void (*draw_vbo)(struct pipe_context *pipe, + const struct pipe_draw_info *info); + uint32_t rs_cull_mode; uint32_t zb_stencilrefmask; ubyte ref_value_front; @@ -144,6 +147,23 @@ static void r300_stencilref_draw_range_elements( } } +static void r300_stencilref_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_stencilref_context *sr = r300->stencilref_fallback; + + if (!r300_stencilref_needed(r300)) { + sr->draw_vbo(pipe, info); + } else { + r300_stencilref_begin(r300); + sr->draw_vbo(pipe, info); + r300_stencilref_switch_side(r300); + sr->draw_vbo(pipe, info); + r300_stencilref_end(r300); + } +} + void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) { r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); @@ -151,8 +171,10 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) /* Save original draw functions. */ r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; + r300->stencilref_fallback->draw_vbo = r300->context.draw_vbo; /* Override the draw functions. */ r300->context.draw_arrays = r300_stencilref_draw_arrays; r300->context.draw_range_elements = r300_stencilref_draw_range_elements; + r300->context.draw_vbo = r300_stencilref_draw_vbo; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 3e221f2e02..bccd7d7859 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1505,6 +1505,23 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, r300->vertex_buffer_count = count; } +static void r300_set_index_buffer(struct pipe_context* pipe, + const struct pipe_index_buffer *ib) +{ + struct r300_context* r300 = r300_context(pipe); + + if (ib) { + pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); + memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); + } + else { + pipe_resource_reference(&r300->index_buffer.buffer, NULL); + memset(&r300->index_buffer, 0, sizeof(r300->index_buffer)); + } + + /* TODO make this more like a state */ +} + /* Initialize the PSC tables. */ static void r300_vertex_psc(struct r300_vertex_element_state *velems) { @@ -1852,6 +1869,7 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_viewport_state = r300_set_viewport_state; r300->context.set_vertex_buffers = r300_set_vertex_buffers; + r300->context.set_index_buffer = r300_set_index_buffer; r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 4c7b67ea52..2c2bd4672b 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -316,6 +316,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) rctx->context.draw_arrays = r600_draw_arrays; rctx->context.draw_elements = r600_draw_elements; rctx->context.draw_range_elements = r600_draw_range_elements; + rctx->context.draw_vbo = r600_draw_vbo; rctx->context.flush = r600_flush; /* Easy accessing of screen/winsys. */ diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 1f03b202ee..9427c19d05 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -157,6 +157,7 @@ struct r600_context { struct r600_context_state *vs_sampler_view[PIPE_MAX_ATTRIBS]; struct r600_vertex_element *vertex_elements; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; }; #if 0 @@ -201,6 +202,8 @@ void r600_draw_range_elements(struct pipe_context *ctx, unsigned index_size, int index_bias, unsigned min_index, unsigned max_index, unsigned mode, unsigned start, unsigned count); +void r600_draw_vbo(struct pipe_context *ctx, + const struct pipe_draw_info *info); void r600_init_blit_functions(struct r600_context *rctx); void r600_init_state_functions(struct r600_context *rctx); diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index b248beaf8c..eeaa677edb 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -225,3 +225,30 @@ void r600_draw_arrays(struct pipe_context *ctx, unsigned mode, draw.index_buffer = NULL; r600_draw_common(&draw); } + +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_draw draw; + + assert(info->index_bias == 0); + + draw.ctx = ctx; + draw.mode = info->mode; + draw.start = info->start; + draw.count = info->count; + if (info->indexed && rctx->index_buffer.buffer) { + draw.index_size = rctx->index_buffer.index_size; + draw.index_buffer = rctx->index_buffer.buffer; + + assert(rctx->index_buffer.offset % + rctx->index_buffer.index_size == 0); + draw.start += rctx->index_buffer.offset / + rctx->index_buffer.index_size; + } + else { + draw.index_size = 0; + draw.index_buffer = NULL; + } + r600_draw_common(&draw); +} diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index ff5df855c6..57879e8d8b 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -404,6 +404,23 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx, rctx->nvertex_buffer = count; } +static void r600_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib) +{ + struct r600_context *rctx = r600_context(ctx); + + if (ib) { + pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); + memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); + } + else { + pipe_resource_reference(&rctx->index_buffer.buffer, NULL); + memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); + } + + /* TODO make this more like a state */ +} + static void r600_set_viewport_state(struct pipe_context *ctx, const struct pipe_viewport_state *state) { @@ -449,6 +466,7 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.set_scissor_state = r600_set_scissor_state; rctx->context.set_stencil_ref = r600_set_stencil_ref; rctx->context.set_vertex_buffers = r600_set_vertex_buffers; + rctx->context.set_index_buffer = r600_set_index_buffer; rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; rctx->context.set_viewport_state = r600_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index e0dd5cf8c2..c748073b2a 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -185,6 +185,21 @@ rbug_draw_range_elements(struct pipe_context *_pipe, pipe_mutex_unlock(rb_pipe->draw_mutex); } +static void +rbug_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) +{ + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct pipe_context *pipe = rb_pipe->pipe; + + pipe_mutex_lock(rb_pipe->draw_mutex); + rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE); + + pipe->draw_vbo(pipe, info); + + rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); + pipe_mutex_unlock(rb_pipe->draw_mutex); +} + static struct pipe_query * rbug_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -744,6 +759,23 @@ rbug_set_vertex_buffers(struct pipe_context *_pipe, buffers); } +static void +rbug_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct pipe_context *pipe = rb_pipe->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = rbug_resource_unwrap(_ib->buffer); + ib = &unwrapped_ib; + } + + pipe->set_index_buffer(pipe, ib); +} + static void rbug_set_sample_mask(struct pipe_context *_pipe, unsigned sample_mask) @@ -1043,6 +1075,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.draw_arrays = rbug_draw_arrays; rb_pipe->base.draw_elements = rbug_draw_elements; rb_pipe->base.draw_range_elements = rbug_draw_range_elements; + rb_pipe->base.draw_vbo = rbug_draw_vbo; rb_pipe->base.create_query = rbug_create_query; rb_pipe->base.destroy_query = rbug_destroy_query; rb_pipe->base.begin_query = rbug_begin_query; @@ -1084,6 +1117,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.set_fragment_sampler_views = rbug_set_fragment_sampler_views; rb_pipe->base.set_vertex_sampler_views = rbug_set_vertex_sampler_views; rb_pipe->base.set_vertex_buffers = rbug_set_vertex_buffers; + rb_pipe->base.set_index_buffer = rbug_set_index_buffer; rb_pipe->base.set_sample_mask = rbug_set_sample_mask; rb_pipe->base.resource_copy_region = rbug_resource_copy_region; rb_pipe->base.clear = rbug_clear; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 12ef98aac7..fa1fae6f00 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -282,12 +282,14 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; softpipe->pipe.set_stream_output_buffers = softpipe_set_stream_output_buffers; softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; + softpipe->pipe.set_index_buffer = softpipe_set_index_buffer; softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; + softpipe->pipe.draw_vbo = softpipe_draw_vbo; softpipe->pipe.draw_stream_output = softpipe_draw_stream_output; softpipe->pipe.clear = softpipe_clear; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 53115a827d..c5f53cfa61 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -82,6 +82,7 @@ struct softpipe_context { struct pipe_sampler_view *geometry_sampler_views[PIPE_MAX_GEOMETRY_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; struct { struct softpipe_resource *buffer[PIPE_MAX_SO_BUFFERS]; int offset[PIPE_MAX_SO_BUFFERS]; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 9e727c9381..2855f55a0e 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -35,6 +35,7 @@ #include "pipe/p_context.h" #include "util/u_inlines.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "sp_context.h" #include "sp_query.h" @@ -111,27 +112,19 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode) * When the min/max element indexes aren't known, minIndex should be 0 * and maxIndex should be ~0. */ -static void -softpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) +void +softpipe_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info) { struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; + void *mapped_indices = NULL; unsigned i; if (!softpipe_check_render_cond(sp)) return; - sp->reduced_api_prim = u_reduced_prim(mode); + sp->reduced_api_prim = u_reduced_prim(info->mode); if (sp->dirty) { softpipe_update_derived(sp); @@ -146,31 +139,27 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, } /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = softpipe_resource(indexBuffer)->data; - draw_set_mapped_element_buffer_range(draw, - indexSize, - indexBias, - minIndex, - maxIndex, - mapped_indexes); - } else { - /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, - 0, 0, - start, - start + count - 1, - NULL); + if (info->indexed && sp->index_buffer.buffer) { + mapped_indices = softpipe_resource(sp->index_buffer.buffer)->data; + mapped_indices += sp->index_buffer.offset; } + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + sp->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); + /* draw! */ - draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); + draw_arrays_instanced(draw, info->mode, info->start, info->count, + info->start_instance, info->instance_count); /* unmap vertex/index buffers - will cause draw module to flush */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } @@ -185,6 +174,49 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, sp->dirty_render_cache = TRUE; } +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + struct softpipe_context *sp = softpipe_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.start_instance = startInstance; + info.instance_count = instanceCount; + info.index_bias = indexBias; + info.min_index = minIndex; + info.max_index = maxIndex; + + if (indexBuffer) { + info.indexed = TRUE; + + saved_ib = sp->index_buffer; + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + softpipe_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + void softpipe_draw_range_elements(struct pipe_context *pipe, diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 7d6b86dce0..f04b0a5d31 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -221,6 +221,9 @@ void softpipe_set_vertex_buffers(struct pipe_context *, unsigned count, const struct pipe_vertex_buffer *); +void softpipe_set_index_buffer(struct pipe_context *, + const struct pipe_index_buffer *); + void softpipe_update_derived( struct softpipe_context *softpipe ); @@ -260,6 +263,10 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount); +void +softpipe_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info); + void softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode); void diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 462f4d2655..880a7c7cd2 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -88,3 +88,17 @@ softpipe_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(softpipe->draw, count, buffers); } + +void +softpipe_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + if (ib) + memcpy(&softpipe->index_buffer, ib, sizeof(softpipe->index_buffer)); + else + memset(&softpipe->index_buffer, 0, sizeof(softpipe->index_buffer)); + + /* TODO make this more like a state */ +} diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 9a46de643f..67a7614c8a 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -190,6 +190,7 @@ struct svga_state struct svga_vertex_shader *vs; struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer ib; struct pipe_resource *cb[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 58e930d983..fceaa83d70 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -248,10 +248,34 @@ svga_draw_arrays( struct pipe_context *pipe, start, count); } +static void +svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct svga_context *svga = svga_context(pipe); + + if (info->indexed && svga->curr.ib.buffer) { + unsigned offset; + + assert(svga->curr.ib.offset % svga->curr.ib.index_size == 0); + offset = svga->curr.ib.offset / svga->curr.ib.index_size; + + svga_draw_range_elements(pipe, svga->curr.ib.buffer, + svga->curr.ib.index_size, info->index_bias, + info->min_index, info->max_index, + info->mode, info->start + offset, info->count); + } + else { + svga_draw_range_elements(pipe, NULL, 0, 0, + info->min_index, info->max_index, + info->mode, info->start, info->count); + } +} + void svga_init_draw_functions( struct svga_context *svga ) { svga->pipe.draw_arrays = svga_draw_arrays; svga->pipe.draw_elements = svga_draw_elements; svga->pipe.draw_range_elements = svga_draw_range_elements; + svga->pipe.draw_vbo = svga_draw_vbo; } diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 23808ad08e..86c79459f3 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -66,6 +66,24 @@ static void svga_set_vertex_buffers(struct pipe_context *pipe, } +static void svga_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct svga_context *svga = svga_context(pipe); + + if (ib) { + pipe_resource_reference(&svga->curr.ib.buffer, ib->buffer); + memcpy(&svga->curr.ib, ib, sizeof(svga->curr.ib)); + } + else { + pipe_resource_reference(&svga->curr.ib.buffer, NULL); + memset(&svga->curr.ib, 0, sizeof(svga->curr.ib)); + } + + /* TODO make this more like a state */ +} + + static void * svga_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, @@ -109,6 +127,7 @@ void svga_cleanup_vertex_state( struct svga_context *svga ) void svga_init_vertex_functions( struct svga_context *svga ) { svga->pipe.set_vertex_buffers = svga_set_vertex_buffers; + svga->pipe.set_index_buffer = svga_set_index_buffer; svga->pipe.create_vertex_elements_state = svga_create_vertex_elements_state; svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state; svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state; diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 55dd6cf883..91c9bf0999 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -167,6 +167,32 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, } +static INLINE void +trace_context_draw_vbo(struct pipe_context *_pipe, + const struct pipe_draw_info *info) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "draw_vbo"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(bool, info->indexed); + trace_dump_arg(uint, info->mode); + trace_dump_arg(uint, info->start); + trace_dump_arg(uint, info->count); + trace_dump_arg(uint, info->start_instance); + trace_dump_arg(uint, info->instance_count); + trace_dump_arg(int, info->index_bias); + trace_dump_arg(uint, info->min_index); + trace_dump_arg(uint, info->max_index); + + pipe->draw_vbo(pipe, info); + + trace_dump_call_end(); +} + + static INLINE struct pipe_query * trace_context_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -1044,6 +1070,30 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, } +static INLINE void +trace_context_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = trace_resource_unwrap(tr_ctx, _ib->buffer); + ib = &unwrapped_ib; + } + + trace_dump_call_begin("pipe_context", "set_index_buffer"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(index_buffer, ib); + + pipe->set_index_buffer(pipe, ib); + + trace_dump_call_end(); +} + static INLINE void trace_context_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *dst, @@ -1436,6 +1486,7 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.draw_arrays = trace_context_draw_arrays; tr_ctx->base.draw_elements = trace_context_draw_elements; tr_ctx->base.draw_range_elements = trace_context_draw_range_elements; + tr_ctx->base.draw_vbo = trace_context_draw_vbo; tr_ctx->base.create_query = trace_context_create_query; tr_ctx->base.destroy_query = trace_context_destroy_query; tr_ctx->base.begin_query = trace_context_begin_query; @@ -1477,6 +1528,7 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.create_sampler_view = trace_create_sampler_view; tr_ctx->base.sampler_view_destroy = trace_sampler_view_destroy; tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; + tr_ctx->base.set_index_buffer = trace_context_set_index_buffer; tr_ctx->base.resource_copy_region = trace_context_resource_copy_region; tr_ctx->base.clear = trace_context_clear; tr_ctx->base.clear_render_target = trace_context_clear_render_target; diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 1727c2a020..bd9a9bfaf1 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -533,6 +533,26 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) } +void trace_dump_index_buffer(const struct pipe_index_buffer *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_index_buffer"); + + trace_dump_member(uint, state, index_size); + trace_dump_member(uint, state, offset); + trace_dump_member(resource_ptr, state, buffer); + + trace_dump_struct_end(); +} + + void trace_dump_vertex_element(const struct pipe_vertex_element *state) { if (!trace_dumping_enabled_locked()) diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index e614e8355e..2e70f4e1c7 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -75,6 +75,8 @@ void trace_dump_transfer(const struct pipe_transfer *state); void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state); +void trace_dump_index_buffer(const struct pipe_index_buffer *state); + void trace_dump_vertex_element(const struct pipe_vertex_element *state); -- cgit v1.2.3 From cd3ef7592cc9e2c83b175a8652c0153c578fb46b Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 16 Jul 2010 04:21:11 +0800 Subject: gallium: Use unified pipe_context::draw_vbo. Update u_draw_quad, st/vega, and st/mesa to use pipe_context::draw_vbo. --- src/gallium/auxiliary/util/u_draw_quad.c | 2 +- src/gallium/state_trackers/vega/polygon.c | 3 +- src/mesa/state_tracker/st_draw.c | 169 ++++++++++++------------------ 3 files changed, 71 insertions(+), 103 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index b37b48b5ae..0b6dc5880f 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -60,7 +60,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* note: vertex elements already set by caller */ /* draw */ - pipe->draw_arrays(pipe, prim_type, 0, num_verts); + util_draw_arrays(pipe, prim_type, 0, num_verts); } diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c index e9c8f03137..bc94170eb9 100644 --- a/src/gallium/state_trackers/vega/polygon.c +++ b/src/gallium/state_trackers/vega/polygon.c @@ -301,8 +301,7 @@ static void draw_polygon(struct vg_context *ctx, cso_set_vertex_elements(ctx->cso_context, 1, &velement); /* draw */ - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN, - 0, poly->num_verts); + util_draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN, 0, (uint) poly->num_verts); } void polygon_fill(struct polygon *poly, struct vg_context *ctx) diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 5821da4889..5b05489270 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -58,6 +58,7 @@ #include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "draw/draw_context.h" #include "cso_cache/cso_context.h" @@ -494,6 +495,49 @@ setup_non_interleaved_attribs(GLcontext *ctx, } +static void +setup_index_buffer(GLcontext *ctx, + const struct _mesa_index_buffer *ib, + struct pipe_index_buffer *ibuffer) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + + memset(ibuffer, 0, sizeof(*ibuffer)); + if (ib) { + struct gl_buffer_object *bufobj = ib->obj; + + switch (ib->type) { + case GL_UNSIGNED_INT: + ibuffer->index_size = 4; + break; + case GL_UNSIGNED_SHORT: + ibuffer->index_size = 2; + break; + case GL_UNSIGNED_BYTE: + ibuffer->index_size = 1; + break; + default: + assert(0); + return; + } + + /* get/create the index buffer object */ + if (bufobj && bufobj->Name) { + /* elements/indexes are in a real VBO */ + struct st_buffer_object *stobj = st_buffer_object(bufobj); + pipe_resource_reference(&ibuffer->buffer, stobj->buffer); + ibuffer->offset = pointer_to_offset(ib->ptr); + } + else { + /* element/indicies are in user space memory */ + ibuffer->buffer = + pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, + ib->count * ibuffer->index_size, + PIPE_BIND_INDEX_BUFFER); + } + } +} /** * Prior to drawing, check that any uniforms referenced by the @@ -568,8 +612,11 @@ st_draw_vbo(GLcontext *ctx, GLuint attr; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; unsigned num_vbuffers, num_velements; + struct pipe_index_buffer ibuffer; GLboolean userSpace = GL_FALSE; GLboolean vertDataEdgeFlags; + struct pipe_draw_info info; + unsigned i; /* Mesa core state should have been validated already */ assert(ctx->NewState == 0x0); @@ -647,113 +694,35 @@ st_draw_vbo(GLcontext *ctx, if (num_vbuffers == 0 || num_velements == 0) return; - /* do actual drawing */ - if (ib) { - /* indexed primitive */ - struct gl_buffer_object *bufobj = ib->obj; - struct pipe_resource *indexBuf = NULL; - unsigned indexSize, indexOffset, i; + setup_index_buffer(ctx, ib, &ibuffer); + pipe->set_index_buffer(pipe, &ibuffer); - switch (ib->type) { - case GL_UNSIGNED_INT: - indexSize = 4; - break; - case GL_UNSIGNED_SHORT: - indexSize = 2; - break; - case GL_UNSIGNED_BYTE: - indexSize = 1; - break; - default: - assert(0); - return; - } - - /* get/create the index buffer object */ - if (bufobj && bufobj->Name) { - /* elements/indexes are in a real VBO */ - struct st_buffer_object *stobj = st_buffer_object(bufobj); - pipe_resource_reference(&indexBuf, stobj->buffer); - indexOffset = pointer_to_offset(ib->ptr) / indexSize; - } - else { - /* element/indicies are in user space memory */ - indexBuf = pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, - ib->count * indexSize, - PIPE_BIND_INDEX_BUFFER); - indexOffset = 0; + util_draw_init_info(&info); + if (ib) { + info.indexed = TRUE; + if (min_index != ~0 && max_index != ~0) { + info.min_index = min_index; + info.max_index = max_index; } + } - /* draw */ - if (pipe->draw_range_elements && min_index != ~0 && max_index != ~0) { - /* XXX: exercise temporary path to pass min/max directly - * through to driver & draw module. These interfaces still - * need a bit of work... - */ - for (i = 0; i < nr_prims; i++) { - unsigned vcount = prims[i].count; - unsigned prim = translate_prim(ctx, prims[i].mode); - - if (u_trim_pipe_prim(prims[i].mode, &vcount)) { - pipe->draw_range_elements(pipe, indexBuf, indexSize, - prims[i].basevertex, - min_index, max_index, prim, - prims[i].start + indexOffset, vcount); - } - } - } - else { - for (i = 0; i < nr_prims; i++) { - unsigned vcount = prims[i].count; - unsigned prim = translate_prim(ctx, prims[i].mode); - - if (u_trim_pipe_prim(prims[i].mode, &vcount)) { - if (prims[i].num_instances == 1) { - pipe->draw_elements(pipe, indexBuf, - indexSize, - prims[i].basevertex, - prim, - prims[i].start + indexOffset, - vcount); - } - else { - pipe->draw_elements_instanced(pipe, indexBuf, - indexSize, - prims[i].basevertex, - prim, - prims[i].start + indexOffset, - vcount, - 0, /* startInstance */ - prims[i].num_instances); - } - } - } + /* do actual drawing */ + for (i = 0; i < nr_prims; i++) { + info.mode = translate_prim( ctx, prims[i].mode ); + info.start = prims[i].start; + info.count = prims[i].count; + info.instance_count = prims[i].num_instances; + info.index_bias = prims[i].basevertex; + if (!ib) { + info.min_index = info.start; + info.max_index = info.start + info.count - 1; } - pipe_resource_reference(&indexBuf, NULL); + if (u_trim_pipe_prim(info.mode, &info.count)) + pipe->draw_vbo(pipe, &info); } - else { - /* non-indexed */ - GLuint i; - - for (i = 0; i < nr_prims; i++) { - unsigned vcount = prims[i].count; - unsigned prim = translate_prim(ctx, prims[i].mode); - if (u_trim_pipe_prim(prims[i].mode, &vcount)) { - if (prims[i].num_instances == 1) { - pipe->draw_arrays(pipe, prim, prims[i].start, vcount); - } - else { - pipe->draw_arrays_instanced(pipe, prim, - prims[i].start, - vcount, - 0, /* startInstance */ - prims[i].num_instances); - } - } - } - } + pipe_resource_reference(&ibuffer.buffer, NULL); /* unreference buffers (frees wrapped user-space buffer objects) */ for (attr = 0; attr < num_vbuffers; attr++) { -- cgit v1.2.3 From 38f5b1bc38fde041162e90e0ba3955ac661e1abf Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Jul 2010 16:53:39 +0100 Subject: util: Don't include xmmintrin.h. Unnecessary. --- src/gallium/auxiliary/util/u_sse.h | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index e2a8491e62..6145e34aa3 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -41,7 +41,6 @@ #if defined(PIPE_ARCH_SSE) -#include #include -- cgit v1.2.3 From e3d2ebac115f7b7899664fefc2652fb829acfa27 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Jul 2010 18:37:46 +0100 Subject: llvmpipe: Avoid corrupting the FPU stack with MMX instructions on 32bit OSes. Unfortunately LLVM doesn't emit EMMS itself, and there is no easy/effective way to disable MMX. http://llvm.org/bugs/show_bug.cgi?id=3287 --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 24 ++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 5 +++++ 2 files changed, 29 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 5a9488b5f7..072408b268 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include "pipe/p_config.h" #include "util/u_debug.h" @@ -141,4 +142,27 @@ lp_set_target_options(void) #if 0 llvm::UnsafeFPMath = true; #endif + +#if 0 + /* + * LLVM will generate MMX instructions for vectors <= 64 bits, leading to + * innefficient code, and in 32bit systems, to the corruption of the FPU + * stack given that it expects the user to generate the EMMS instructions. + * + * See also: + * - http://llvm.org/bugs/show_bug.cgi?id=3287 + * - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/ + * + * XXX: Unfortunately this is not working. + */ + static boolean first = FALSE; + if (first) { + static const char* options[] = { + "prog", + "-disable-mmx" + }; + llvm::cl::ParseCommandLineOptions(2, const_cast(options)); + first = FALSE; + } +#endif } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 5953d690a4..dbcc286417 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -676,6 +676,11 @@ generate_fragment(struct llvmpipe_context *lp, color_ptr); } +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); -- cgit v1.2.3 From 111902f2c47377f7d7ea41af6a2a29a087350f17 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 26 Jul 2010 12:16:45 +0100 Subject: draw: Also emit EMMS on generated LLVM IR. --- src/gallium/auxiliary/draw/draw_llvm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 19f96c37ab..48489e5f6f 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -37,6 +37,7 @@ #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_tgsi.h" #include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_intr.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" @@ -793,6 +794,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) sampler->destroy(sampler); +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -963,6 +969,11 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian sampler->destroy(sampler); +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); -- cgit v1.2.3 From bdaa8be5b7dce396dd98f05e7ad66d48633cc4cb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 20 Jul 2010 13:40:01 +0100 Subject: scons: Use the current python executable for code generation. Less susceptible to be broken. --- src/gallium/auxiliary/SConscript | 4 ++-- src/gallium/drivers/llvmpipe/SConscript | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 72a16617db..8381ae5b3e 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -34,14 +34,14 @@ env.CodeGenerate( target = 'util/u_format_table.c', script = '#src/gallium/auxiliary/util/u_format_table.py', source = ['#src/gallium/auxiliary/util/u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' + command = python_cmd + ' $SCRIPT $SOURCE > $TARGET' ) env.CodeGenerate( target = 'util/u_half.c', script = 'util/u_half.py', source = [], - command = 'python $SCRIPT > $TARGET' + command = python_cmd + ' $SCRIPT > $TARGET' ) env.Depends('util/u_format_table.c', [ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index fd6ba1561e..5583fca38e 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -1,3 +1,4 @@ +from sys import executable as python_cmd import distutils.version Import('*') @@ -16,7 +17,7 @@ env.CodeGenerate( target = 'lp_tile_soa.c', script = 'lp_tile_soa.py', source = ['#src/gallium/auxiliary/util/u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' + command = python_cmd + ' $SCRIPT $SOURCE > $TARGET' ) # XXX: Our dependency scanner only finds depended modules in relative dirs. -- cgit v1.2.3 From a258701cd949d45041ed571ca08fedc40de2cf69 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 16 Jul 2010 12:50:01 +0100 Subject: util: add uint version of pack_z_stencil Useful for packing mask values. --- src/gallium/auxiliary/util/u_pack_color.h | 47 +++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 3ebef9fb74..5f113f742b 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -425,6 +425,53 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color * } } +/* Integer versions of util_pack_z and util_pack_z_stencil - useful for + * constructing clear masks. + */ +static INLINE uint +util_pack_uint_z(enum pipe_format format, unsigned z) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return z & 0xffff; + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + return z; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_Z24X8_UNORM: + return z & 0xffffff; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return (z & 0xffffff) << 8; + case PIPE_FORMAT_S8_USCALED: + return 0; + default: + debug_print_format("gallium: unhandled format in util_pack_z()", format); + assert(0); + return 0; + } +} + +static INLINE uint +util_pack_uint_z_stencil(enum pipe_format format, double z, uint s) +{ + unsigned packed = util_pack_uint_z(format, z); + + s &= 0xff; + + switch (format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return packed | (s << 24); + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return packed | s; + case PIPE_FORMAT_S8_USCALED: + return packed | s; + default: + return packed; + } +} + + /** * Note: it's assumed that z is in [0,1] -- cgit v1.2.3 From 5f90e76c54bbf4456c977b3cbca450d7a570179e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 19:18:48 +0100 Subject: gallivm: fix lp_build_sample_offset() crash when indexing a 1-D texture If y==NULL and y_stride==NULL it means the texture is 1D. Return zero for out_i and the offset instead of garbage. --- src/gallium/auxiliary/gallivm/lp_bld_sample.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 0fd014ab9b..655c4fb901 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -179,6 +179,9 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); offset = lp_build_add(bld, offset, y_offset); } + else { + *out_j = bld->zero; + } if (z && z_stride) { LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); -- cgit v1.2.3 From 02da55676bd483df5e8540e079f53c7f41178025 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 29 Jul 2010 19:38:02 +0100 Subject: Revert "gallivm: fix lp_build_sample_offset() crash when indexing a 1-D texture" This reverts commit 5f90e76c54bbf4456c977b3cbca450d7a570179e. Bad cherry-pick. --- src/gallium/auxiliary/gallivm/lp_bld_sample.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 655c4fb901..0fd014ab9b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -179,9 +179,6 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); offset = lp_build_add(bld, offset, y_offset); } - else { - *out_j = bld->zero; - } if (z && z_stride) { LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); -- cgit v1.2.3 From 8f3fe7e2f0a3ce1a5c45fd204b0105f3b501e641 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 12:03:33 -0600 Subject: gallivm: added lp_build_assert() function to make assertions in LLVM code --- src/gallium/auxiliary/Makefile | 1 + src/gallium/auxiliary/SConscript | 1 + src/gallium/auxiliary/gallivm/lp_bld_assert.c | 101 ++++++++++++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_assert.h | 41 +++++++++++ 4 files changed, 144 insertions(+) create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_assert.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_assert.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index dcebab7c0f..843b72bc38 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -149,6 +149,7 @@ C_SOURCES = \ GALLIVM_SOURCES = \ gallivm/lp_bld_arit.c \ + gallivm/lp_bld_assert.c \ gallivm/lp_bld_const.c \ gallivm/lp_bld_conv.c \ gallivm/lp_bld_debug.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 8381ae5b3e..1f09198721 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -198,6 +198,7 @@ source = [ if env['llvm']: source += [ 'gallivm/lp_bld_arit.c', + 'gallivm/lp_bld_assert.c', 'gallivm/lp_bld_const.c', 'gallivm/lp_bld_conv.c', 'gallivm/lp_bld_debug.c', diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.c b/src/gallium/auxiliary/gallivm/lp_bld_assert.c new file mode 100644 index 0000000000..f2ebd868a8 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.c @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "lp_bld_assert.h" +#include "lp_bld_init.h" +#include "lp_bld_printf.h" + + +/** + * A call to lp_build_assert() will build a function call to this function. + */ +static void +lp_assert(int condition, const char *msg) +{ + if (!condition) { + debug_printf("LLVM assertion '%s' failed!\n", msg); + assert(condition); + } +} + + + +/** + * lp_build_assert. + * + * Build an assertion in LLVM IR by building a function call to the + * lp_assert() function above. + * + * \param condition should be an 'i1' or 'i32' value + * \param msg a string to print if the assertion fails. + */ +LLVMValueRef +lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, + const char *msg) +{ + LLVMModuleRef module; + LLVMTypeRef arg_types[2]; + LLVMValueRef msg_string, assert_func, params[2], r; + + module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( + LLVMGetInsertBlock(builder))); + + msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1); + + arg_types[0] = LLVMInt32Type(); + arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); + + /* lookup the lp_assert function */ + assert_func = LLVMGetNamedFunction(module, "lp_assert"); + + /* Create the assertion function if not found */ + if (!assert_func) { + LLVMTypeRef func_type = + LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0); + + assert_func = LLVMAddFunction(module, "lp_assert", func_type); + LLVMSetFunctionCallConv(assert_func, LLVMCCallConv); + LLVMSetLinkage(assert_func, LLVMExternalLinkage); + LLVMAddGlobalMapping(lp_build_engine, assert_func, + func_to_pointer((func_pointer)lp_assert)); + } + assert(assert_func); + + /* build function call param list */ + params[0] = LLVMBuildZExt(builder, condition, arg_types[0], ""); + params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], ""); + + /* check arg types */ + assert(LLVMTypeOf(params[0]) == arg_types[0]); + assert(LLVMTypeOf(params[1]) == arg_types[1]); + + r = LLVMBuildCall(builder, assert_func, params, 2, ""); + + return r; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.h b/src/gallium/auxiliary/gallivm/lp_bld_assert.h new file mode 100644 index 0000000000..ddd879dc2c --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_BLD_ASSERT_H +#define LP_BLD_ASSERT_H + + +#include "lp_bld.h" + + +LLVMValueRef +lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, + const char *msg); + + +#endif + -- cgit v1.2.3 From 8a2933f3663177f32f5ee45bb696463b8549dcbb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 13:44:44 -0600 Subject: draw: add vertex buffer offset in draw_print_arrays() --- src/gallium/auxiliary/draw/draw_pt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 92d4113b4c..adef26a167 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -259,6 +259,7 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) for (j = 0; j < draw->pt.nr_vertex_elements; j++) { uint buf = draw->pt.vertex_element[j].vertex_buffer_index; ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf]; + ptr += draw->pt.vertex_buffer[buf].buffer_offset; ptr += draw->pt.vertex_buffer[buf].stride * ii; ptr += draw->pt.vertex_element[j].src_offset; -- cgit v1.2.3 From ba2cc3b8e6ad161181b67fd2575c6bc768584d23 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 13:49:21 -0600 Subject: gallium: implement bounds checking for constant buffers Plumb the constant buffer sizes down into the tgsi interpreter where we can do bounds checking. Optional debug code warns upon out-of-bounds reading. Plus add a few other assertions in the TGSI interpreter. --- src/gallium/auxiliary/draw/draw_context.c | 11 ++++- src/gallium/auxiliary/draw/draw_gs.c | 13 ++++-- src/gallium/auxiliary/draw/draw_gs.h | 1 + src/gallium/auxiliary/draw/draw_private.h | 7 ++- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 4 ++ .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 1 + src/gallium/auxiliary/draw/draw_vs.c | 16 ++++++- src/gallium/auxiliary/draw/draw_vs.h | 3 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 8 ++-- src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 + src/gallium/auxiliary/draw/draw_vs_varient.c | 6 ++- src/gallium/auxiliary/tgsi/tgsi_exec.c | 53 +++++++++++++++++++--- src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 ++++ src/gallium/drivers/softpipe/sp_context.h | 1 + src/gallium/drivers/softpipe/sp_quad_fs.c | 7 +-- src/gallium/drivers/softpipe/sp_state_fs.c | 2 + 16 files changed, 117 insertions(+), 27 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index c127f74188..995b675b9a 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -288,12 +288,19 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, shader_type == PIPE_SHADER_GEOMETRY); debug_assert(slot < PIPE_MAX_CONSTANT_BUFFERS); - if (shader_type == PIPE_SHADER_VERTEX) { + switch (shader_type) { + case PIPE_SHADER_VERTEX: draw->pt.user.vs_constants[slot] = buffer; + draw->pt.user.vs_constants_size[slot] = size; draw_vs_set_constants(draw, slot, buffer, size); - } else if (shader_type == PIPE_SHADER_GEOMETRY) { + break; + case PIPE_SHADER_GEOMETRY: draw->pt.user.gs_constants[slot] = buffer; + draw->pt.user.gs_constants_size[slot] = size; draw_gs_set_constants(draw, slot, buffer, size); + break; + default: + assert(0 && "invalid shader type in draw_set_mapped_constant_buffer"); } } diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 0c590f936b..a36321d910 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -75,7 +75,7 @@ draw_gs_set_constants(struct draw_context *draw, const void *constants, unsigned size) { - /* noop */ + debug_printf("draw_gs_set_constants() not implemented yet!\n"); } @@ -394,8 +394,13 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, const ushort *elts = input_prims->elts; #include "draw_gs_tmp.h" + +/** + * Execute geometry shader using TGSI interpreter. + */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, const struct draw_prim_info *input_prim, struct draw_vertex_info *output_verts, @@ -405,7 +410,6 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, unsigned input_stride = input_verts->vertex_size; unsigned vertex_size = input_verts->vertex_size; struct tgsi_exec_machine *machine = shader->machine; - unsigned int i; unsigned num_input_verts = input_prim->linear ? input_verts->count : input_prim->count; @@ -447,9 +451,8 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, } shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = constants[i]; - } + tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, + constants, constants_size); if (input_prim->linear) gs_run(shader, input_prim, input_verts, diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 06f4b822a2..67bc1aa73f 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -73,6 +73,7 @@ struct draw_geometry_shader { */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, const struct draw_prim_info *input_prim, struct draw_vertex_info *output_verts, diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 058aeedc17..397d4bf653 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -163,9 +163,11 @@ struct draw_context /** vertex arrays */ const void *vbuffer[PIPE_MAX_ATTRIBS]; - /** constant buffer (for vertex/geometry shader) */ + /** constant buffers (for vertex/geometry shader) */ const void *vs_constants[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned vs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned gs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; } user; boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ @@ -198,6 +200,7 @@ struct draw_context struct pipe_viewport_state viewport; boolean identity_viewport; + /** Vertex shader state */ struct { struct draw_vertex_shader *vertex_shader; uint num_vs_outputs; /**< convenience, from vertex_shader */ @@ -227,6 +230,7 @@ struct draw_context struct translate_cache *emit_cache; } vs; + /** Geometry shader state */ struct { struct draw_geometry_shader *geometry_shader; uint num_gs_outputs; /**< convenience, from geometry_shader */ @@ -239,6 +243,7 @@ struct draw_context struct tgsi_sampler **samplers; } gs; + /** Stream output (vertex feedback) state */ struct { struct pipe_stream_output_state state; void *buffers[PIPE_MAX_SO_BUFFERS]; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 121dfc414a..5b16c3788e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -176,6 +176,7 @@ static void emit(struct pt_emit *emit, static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, struct draw_vertex_info *output_verts ) { @@ -190,6 +191,7 @@ static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, (const float (*)[4])input_verts->verts->data, ( float (*)[4])output_verts->verts->data, constants, + const_size, input_verts->count, input_verts->vertex_size, input_verts->vertex_size); @@ -236,6 +238,7 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, if (fpme->opt & PT_SHADE) { draw_vertex_shader_run(vshader, draw->pt.user.vs_constants, + draw->pt.user.vs_constants_size, vert_info, &vs_vert_info); @@ -246,6 +249,7 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, if ((fpme->opt & PT_SHADE) && gshader) { draw_geometry_shader_run(gshader, draw->pt.user.gs_constants, + draw->pt.user.gs_constants_size, vert_info, prim_info, &gs_vert_info, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 5c9db12086..4b99bee86a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -254,6 +254,7 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, if ((opt & PT_SHADE) && gshader) { draw_geometry_shader_run(gshader, draw->pt.user.gs_constants, + draw->pt.user.gs_constants_size, vert_info, prim_info, &gs_vert_info, diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 57ea63fc06..fb665b08ff 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -48,18 +48,30 @@ DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE) + +/** + * Set a vertex shader constant buffer. + * \param slot which constant buffer in [0, PIPE_MAX_CONSTANT_BUFFERS-1] + * \param constants the mapped buffer + * \param size size of buffer in bytes + */ void draw_vs_set_constants(struct draw_context *draw, unsigned slot, const void *constants, unsigned size) { - if (((uintptr_t)constants) & 0xf) { + const int alignment = 16; + + /* check if buffer is 16-byte aligned */ + if (((uintptr_t)constants) & (alignment - 1)) { + /* if not, copy the constants into a new, 16-byte aligned buffer */ if (size > draw->vs.const_storage_size[slot]) { if (draw->vs.aligned_constant_storage[slot]) { align_free((void *)draw->vs.aligned_constant_storage[slot]); } - draw->vs.aligned_constant_storage[slot] = align_malloc(size, 16); + draw->vs.aligned_constant_storage[slot] = + align_malloc(size, alignment); } assert(constants); memcpy((void *)draw->vs.aligned_constant_storage[slot], diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index a731994523..f9a038788f 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -133,7 +133,8 @@ struct draw_vertex_shader { void (*run_linear)( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ); diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index bc34d390da..dab3eb1ca8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -85,7 +85,8 @@ static void vs_exec_run_linear( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) @@ -95,9 +96,8 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, unsigned int i, j; unsigned slot; - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = constants[i]; - } + tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, + constants, const_size); for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 6c13df7913..d13ad24fff 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -49,6 +49,7 @@ vs_llvm_run_linear( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 6eb26927f2..eacd160187 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -149,7 +149,8 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants_size, count, temp_vertex_stride, temp_vertex_stride); @@ -214,7 +215,8 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants_size, count, temp_vertex_stride, temp_vertex_stride); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 5275faa5e2..6fcbf4f212 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -557,6 +557,23 @@ print_temp(const struct tgsi_exec_machine *mach, uint index) #endif +void +tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, + unsigned num_bufs, + const void **bufs, + const unsigned *buf_sizes) +{ + unsigned i; + + for (i = 0; i < num_bufs; i++) { + mach->Consts[i] = bufs[i]; + mach->ConstsSize[i] = buf_sizes[i]; + } +} + + + + /** * Check if there's a potential src/dst register data dependency when * using SOA execution. @@ -632,6 +649,11 @@ tgsi_exec_machine_bind_shader( util_init_math(); + if (numSamplers) { + assert(samplers); + assert(samplers[0]); + } + mach->Tokens = tokens; mach->Samplers = samplers; @@ -1040,6 +1062,8 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, { uint i; + assert(swizzle < 4); + switch (file) { case TGSI_FILE_CONSTANT: for (i = 0; i < QUAD_SIZE; i++) { @@ -1049,9 +1073,23 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, if (index->i[i] < 0) { chan->u[i] = 0; } else { - const uint *p = (const uint *)mach->Consts[index2D->i[i]]; - - chan->u[i] = p[index->i[i] * 4 + swizzle]; + /* NOTE: copying the const value as a uint instead of float */ + const uint constbuf = index2D->i[i]; + const uint *buf = (const uint *)mach->Consts[constbuf]; + const int pos = index->i[i] * 4 + swizzle; + /* const buffer bounds check */ + if (pos < 0 || pos >= mach->ConstsSize[constbuf]) { + if (0) { + /* Debug: print warning */ + static int count = 0; + if (count++ < 100) + debug_printf("TGSI Exec: const buffer index %d" + " out of bounds\n", pos); + } + chan->u[i] = 0; + } + else + chan->u[i] = buf[pos]; } } break; @@ -1065,9 +1103,10 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], index2D->i[i], index->i[i]); }*/ - chan->u[i] = mach->Inputs[index2D->i[i] * - TGSI_EXEC_MAX_INPUT_ATTRIBS + - index->i[i]].xyzw[swizzle].u[i]; + int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; + assert(pos >= 0); + assert(pos < Elements(mach->Inputs)); + chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; } break; @@ -1187,7 +1226,7 @@ fetch_source(const struct tgsi_exec_machine *mach, index2.i[1] = index2.i[2] = index2.i[3] = reg->Indirect.Index; - + assert(reg->Indirect.File == TGSI_FILE_ADDRESS); /* get current value of address register[swizzle] */ swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); fetch_src_file_channel(mach, diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index ccf80ca6fd..6dee362d58 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -253,7 +253,10 @@ struct tgsi_exec_machine struct tgsi_sampler **Samplers; unsigned ImmLimit; + const void *Consts[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS]; + const struct tgsi_token *Tokens; /**< Declarations, instructions */ unsigned Processor; /**< TGSI_PROCESSOR_x */ @@ -367,6 +370,13 @@ tgsi_set_exec_mask(struct tgsi_exec_machine *mach, } +extern void +tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, + unsigned num_bufs, + const void **bufs, + const unsigned *buf_sizes); + + #if defined __cplusplus } /* extern "C" */ #endif diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index c5f53cfa61..9361a3df09 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -112,6 +112,7 @@ struct softpipe_context { /** Mapped constant buffers */ const void *mapped_constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; + unsigned const_buffer_size[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; /** Vertex format */ struct vertex_info vertex_info; diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index d240bcbf3b..90f4787d59 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -111,9 +111,10 @@ shade_quads(struct quad_stage *qs, struct tgsi_exec_machine *machine = softpipe->fs_machine; unsigned i, nr_quads = 0; - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT][i]; - } + tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, + softpipe->mapped_constants[PIPE_SHADER_FRAGMENT], + softpipe->const_buffer_size[PIPE_SHADER_FRAGMENT]); + machine->InterpCoefs = quads[0]->coef; for (i = 0; i < nr; i++) { diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 3fbf1f2578..ded242d3dc 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -195,6 +195,8 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, } softpipe->mapped_constants[shader][index] = data; + softpipe->const_buffer_size[shader][index] = size; + softpipe->dirty |= SP_NEW_CONSTANTS; } -- cgit v1.2.3 From d88b6e19c14900f4cad94cf7a28d159369463108 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 14:09:08 -0600 Subject: draw: assorted clean-ups in clipper code --- src/gallium/auxiliary/draw/draw_pipe_clip.c | 40 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 1cf6ee7a7f..8a3d499feb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -68,8 +68,7 @@ struct clip_stage { }; -/* This is a bit confusing: - */ +/** Cast wrapper */ static INLINE struct clip_stage *clip_stage( struct draw_stage *stage ) { return (struct clip_stage *)stage; @@ -81,18 +80,22 @@ static INLINE struct clip_stage *clip_stage( struct draw_stage *stage ) /* All attributes are float[4], so this is easy: */ -static void interp_attr( float *fdst, +static void interp_attr( float dst[4], float t, - const float *fin, - const float *fout ) + const float in[4], + const float out[4] ) { - fdst[0] = LINTERP( t, fout[0], fin[0] ); - fdst[1] = LINTERP( t, fout[1], fin[1] ); - fdst[2] = LINTERP( t, fout[2], fin[2] ); - fdst[3] = LINTERP( t, fout[3], fin[3] ); + dst[0] = LINTERP( t, out[0], in[0] ); + dst[1] = LINTERP( t, out[1], in[1] ); + dst[2] = LINTERP( t, out[2], in[2] ); + dst[3] = LINTERP( t, out[3], in[3] ); } +/** + * Copy front/back, primary/secondary colors from src vertex to dst vertex. + * Used when flat shading. + */ static void copy_colors( struct draw_stage *stage, struct vertex_header *dst, const struct vertex_header *src ) @@ -121,20 +124,17 @@ static void interp( const struct clip_stage *clip, /* Vertex header. */ - { - dst->clipmask = 0; - dst->edgeflag = 0; /* will get overwritten later */ - dst->pad = 0; - dst->vertex_id = UNDEFINED_VERTEX_ID; - } + dst->clipmask = 0; + dst->edgeflag = 0; /* will get overwritten later */ + dst->pad = 0; + dst->vertex_id = UNDEFINED_VERTEX_ID; - /* Clip coordinates: interpolate normally + /* Interpolate the clip-space coords. */ - { - interp_attr(dst->clip, t, in->clip, out->clip); - } + interp_attr(dst->clip, t, in->clip, out->clip); - /* Do the projective divide and insert window coordinates: + /* Do the projective divide and viewport transformation to get + * new window coordinates: */ { const float *pos = dst->clip; -- cgit v1.2.3 From b4c8de1ff24d4d5e2fe550da54249934320acab4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 17:24:20 -0600 Subject: draw: do bounds checking of array elements (debug only) Make sure that all the element indexes actually lie inside the vertex buffer. Also, rename pipe_run() to pipe_run_elts() to be more specific. And assert/check the vertex count for the non-indexed case. --- src/gallium/auxiliary/draw/draw_pipe.c | 35 +++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 8cd75ecf9a..144f10a5d0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -220,7 +220,7 @@ static void do_triangle( struct draw_context *draw, do_point( draw, \ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) ) -#define FUNC pipe_run +#define FUNC pipe_run_elts #define ARGS \ struct draw_context *draw, \ unsigned prim, \ @@ -269,14 +269,29 @@ void draw_pipeline_run( struct draw_context *draw, i < prim_info->primitive_count; start += prim_info->primitive_lengths[i], i++) { - unsigned count = prim_info->primitive_lengths[i]; - - pipe_run(draw, - prim_info->prim, - vert_info->verts, - vert_info->stride, - prim_info->elts + start, - count); + const unsigned count = prim_info->primitive_lengths[i]; + +#if DEBUG + /* make sure none of the element indexes go outside the vertex buffer */ + { + unsigned max_index = 0x0, i; + /* find the largest element index */ + for (i = 0; i < count; i++) { + unsigned int index = (prim_info->elts[start + i] + & ~DRAW_PIPE_FLAG_MASK); + if (index > max_index) + max_index = index; + } + assert(max_index <= vert_info->count); + } +#endif + + pipe_run_elts(draw, + prim_info->prim, + vert_info->verts, + vert_info->stride, + prim_info->elts + start, + count); } draw->pipeline.verts = NULL; @@ -378,6 +393,8 @@ void draw_pipeline_run_linear( struct draw_context *draw, draw->pipeline.vertex_stride = vert_info->stride; draw->pipeline.vertex_count = count; + assert(count <= vert_info->count); + pipe_run_linear(draw, prim_info->prim, (struct vertex_header*)verts, -- cgit v1.2.3 From f623d06c495c671f687d4a70c9281c64e5875232 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 30 Jul 2010 10:47:20 +0100 Subject: util: more helpers for old draw code --- src/gallium/auxiliary/util/u_draw.h | 140 +++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_draw_quad.h | 26 +----- 2 files changed, 141 insertions(+), 25 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_draw.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h new file mode 100644 index 0000000000..b901f97008 --- /dev/null +++ b/src/gallium/auxiliary/util/u_draw.h @@ -0,0 +1,140 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_DRAW_H +#define U_DRAW_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" + + +static INLINE void +util_draw_init_info(struct pipe_draw_info *info) +{ + memset(info, 0, sizeof(*info)); + info->instance_count = 1; + info->max_index = 0xffffffff; +} + + +static INLINE void +util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.min_index = start; + info.max_index = start + count - 1; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_elements(struct pipe_context *pipe, int index_bias, + uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.indexed = TRUE; + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_arrays_instanced(struct pipe_context *pipe, + uint mode, uint start, uint count, + uint start_instance, + uint instance_count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.start_instance = start_instance; + info.instance_count = instance_count; + info.min_index = start; + info.max_index = start + count - 1; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_elements_instanced(struct pipe_context *pipe, + int index_bias, + uint mode, uint start, uint count, + uint start_instance, + uint instance_count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.indexed = TRUE; + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + info.start_instance = start_instance; + info.instance_count = instance_count; + info.min_index = start; + info.max_index = start + count - 1; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_range_elements(struct pipe_context *pipe, + int index_bias, + uint min_index, + uint max_index, + uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.indexed = TRUE; + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + info.min_index = min_index; + info.max_index = max_index; + + pipe->draw_vbo(pipe, &info); +} + +#endif diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index 1c9f752611..52994fe05c 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -39,31 +39,7 @@ extern "C" { struct pipe_resource; - -static INLINE void -util_draw_init_info(struct pipe_draw_info *info) -{ - memset(info, 0, sizeof(*info)); - info->instance_count = 1; - info->max_index = 0xffffffff; -} - - -static INLINE void -util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count) -{ - struct pipe_draw_info info; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.min_index = start; - info.max_index = start + count - 1; - - pipe->draw_vbo(pipe, &info); -} - +#include "util/u_draw.h" extern void util_draw_vertex_buffer(struct pipe_context *pipe, -- cgit v1.2.3 From 042018a943a403a4d9887b400deb3b3c83ee40c0 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 27 Jul 2010 12:26:54 -0400 Subject: llvmpipe: delete function bodies after generating machine code --- src/gallium/auxiliary/draw/draw_llvm.c | 2 ++ src/gallium/auxiliary/gallivm/lp_bld_init.h | 2 ++ src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 8 ++++++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 1 + 4 files changed, 13 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 48489e5f6f..8022b720b3 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -826,6 +826,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } + lp_func_delete_body(variant->function); } @@ -1001,6 +1002,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } + lp_func_delete_body(variant->function_elts); } void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index a32ced9b4c..f26fdac466 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -44,5 +44,7 @@ extern LLVMPassManagerRef lp_build_pass; void lp_build_init(void); +extern void +lp_func_delete_body(LLVMValueRef func); #endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 072408b268..6d5410d970 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -166,3 +166,11 @@ lp_set_target_options(void) } #endif } + + +extern "C" void +lp_func_delete_body(LLVMValueRef FF) +{ + llvm::Function *func = llvm::unwrap(FF); + func->deleteBody(); +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index dbcc286417..5ee5bde184 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -715,6 +715,7 @@ generate_fragment(struct llvmpipe_context *lp, if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(f); } + lp_func_delete_body(function); } } -- cgit v1.2.3 From 0e7d7d3051d125621b2b79c11f577fc3a3cc7b2c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 30 Jul 2010 23:44:16 +0800 Subject: util: Fix the range of util_draw_elements_instanced. Keep min_index and max_index at their defaults (0 and ~0). --- src/gallium/auxiliary/util/u_draw.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h index b901f97008..2a91ea0f9a 100644 --- a/src/gallium/auxiliary/util/u_draw.h +++ b/src/gallium/auxiliary/util/u_draw.h @@ -110,8 +110,6 @@ util_draw_elements_instanced(struct pipe_context *pipe, info.index_bias = index_bias; info.start_instance = start_instance; info.instance_count = instance_count; - info.min_index = start; - info.max_index = start + count - 1; pipe->draw_vbo(pipe, &info); } -- cgit v1.2.3 From 078eff659a7ef90691966d983f35ed9e4ce63901 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 30 Jul 2010 11:48:48 -0700 Subject: llvmpipe: Fix implicit declaration of lp_func_delete_body warnings. --- src/gallium/auxiliary/draw/draw_llvm.c | 1 + src/gallium/drivers/llvmpipe/lp_state_fs.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 8022b720b3..de99b00a81 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -38,6 +38,7 @@ #include "gallivm/lp_bld_tgsi.h" #include "gallivm/lp_bld_printf.h" #include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_init.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 5ee5bde184..dbca49a2ef 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -75,6 +75,7 @@ #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_tgsi.h" -- cgit v1.2.3 From ce1fed1659ee0af063c6bace8a1aba879b85e775 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 30 Jul 2010 13:28:42 -0600 Subject: tgsi: remove incorrect assertion --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 6fcbf4f212..298f3d0a8b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -651,7 +651,6 @@ tgsi_exec_machine_bind_shader( if (numSamplers) { assert(samplers); - assert(samplers[0]); } mach->Tokens = tokens; -- cgit v1.2.3 From dd406cf34196a5a60362d8e1928b1308b56dd3f8 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 30 Jul 2010 15:42:06 -0400 Subject: draw: actually a noop, rather than not implemented we just have nothing to do in it right now --- src/gallium/auxiliary/draw/draw_gs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index a36321d910..cff859a42b 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -75,7 +75,10 @@ draw_gs_set_constants(struct draw_context *draw, const void *constants, unsigned size) { - debug_printf("draw_gs_set_constants() not implemented yet!\n"); + /* noop. added here for symmetry with the VS + * code and in case we'll ever want to allign + * the constants, e.g. when we'll change to a + * different interpreter */ } -- cgit v1.2.3 From aef4500ca4ae9e5bc57560c6f9a32d9ad892975b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 08:57:14 +1000 Subject: draw: fix warning in sse code. Not sure if this will actually fix the issue, but it fixes the warning. --- src/gallium/auxiliary/draw/draw_vs_sse.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 14c95082a9..0b0c6077c6 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -84,6 +84,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base, const float (*input)[4], float (*output)[4], const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) -- cgit v1.2.3 From c1f33097f4a6cd33df57dc601ba1733985979a4f Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 30 Jul 2010 16:59:05 +0200 Subject: util: Fix unpacking of R8G8Bx_SNORM format. Apparently, we must always use integers to perform calculations, otherwise the results won't match D3D's CxV8U8 definition. --- src/gallium/auxiliary/util/u_format_other.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_format_other.c b/src/gallium/auxiliary/util/u_format_other.c index 723fa8c3bf..fa42ec3713 100644 --- a/src/gallium/auxiliary/util/u_format_other.c +++ b/src/gallium/auxiliary/util/u_format_other.c @@ -121,6 +121,15 @@ util_format_r1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, * A.k.a. D3DFMT_CxV8U8 */ +static uint8_t +r8g8bx_derive(int16_t r, int16_t g) +{ + /* Derive blue from red and green components. + * Apparently, we must always use integers to perform calculations, + * otherwise the results won't match D3D's CxV8U8 definition. + */ + return (uint8_t)sqrtf(0x7f * 0x7f - r * r - g * g) * 0xff / 0x7f; +} void util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, @@ -145,7 +154,7 @@ util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, dst[0] = (float)(r * (1.0f/0x7f)); /* r */ dst[1] = (float)(g * (1.0f/0x7f)); /* g */ - dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */ + dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */ dst[3] = 1.0f; /* a */ dst += 4; } @@ -177,7 +186,7 @@ util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_strid dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */ dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */ - dst[2] = (uint8_t)sqrtf(0x7f*0x7f - r * r - g * g) * 0xff / 0x7f; /* b */ + dst[2] = r8g8bx_derive(r, g); /* b */ dst[3] = 255; /* a */ dst += 4; } @@ -262,6 +271,6 @@ util_format_r8g8bx_snorm_fetch_rgba_float(float *dst, const uint8_t *src, dst[0] = r * (1.0f/0x7f); /* r */ dst[1] = g * (1.0f/0x7f); /* g */ - dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */ + dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */ dst[3] = 1.0f; /* a */ } -- cgit v1.2.3 From 1fd84b10f269a32db66254aa567b1aad8b152fe8 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 3 Aug 2010 15:56:17 -0400 Subject: gallium/util: add extra primitives to the trimmer --- src/gallium/auxiliary/util/u_prim.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index 606b9b5c6b..63ddc86475 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -108,6 +108,19 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr ) ok = (*nr >= 4); *nr -= (*nr % 2); break; + case PIPE_PRIM_LINES_ADJACENCY: + ok = (*nr >= 4); + *nr -= (*nr % 4); + break; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + break; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + ok = (*nr >= 6); + *nr -= (*nr % 5); + break; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + ok = (*nr >= 4); + break; default: ok = 0; break; -- cgit v1.2.3 From 6f9d3516a4f5b8abc9e8510dbccd93bf7874e6da Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 3 Aug 2010 16:02:09 -0400 Subject: gallium/draw: forgot about PIPE_PRIM_LINE_STRIP_ADJACENCY --- src/gallium/auxiliary/util/u_prim.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index 63ddc86475..3c851f7340 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -113,6 +113,7 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr ) *nr -= (*nr % 4); break; case PIPE_PRIM_LINE_STRIP_ADJACENCY: + ok = (*nr >= 4); break; case PIPE_PRIM_TRIANGLES_ADJACENCY: ok = (*nr >= 6); -- cgit v1.2.3 From f94e7e25d101a8691a5276a5aa0eb97d2f4eebbc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 3 Aug 2010 14:12:47 -0600 Subject: draw: add assertion, rearrange debug code --- src/gallium/auxiliary/draw/draw_pt.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index adef26a167..0a41328642 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -342,19 +342,22 @@ draw_arrays_instanced(struct draw_context *draw, unsigned reduced_prim = u_reduced_prim(mode); unsigned instance; + assert(instanceCount > 0); + if (reduced_prim != draw->reduced_prim) { draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); draw->reduced_prim = reduced_prim; } if (0) - draw_print_arrays(draw, mode, start, MIN2(count, 20)); - - if (0) { - unsigned int i; debug_printf("draw_arrays(mode=%u start=%u count=%u):\n", mode, start, count); + + if (0) tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); + + if (0) { + unsigned int i; debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { debug_printf(" %u: src_offset=%u inst_div=%u vbuf=%u format=%s\n", @@ -375,6 +378,9 @@ draw_arrays_instanced(struct draw_context *draw, } } + if (0) + draw_print_arrays(draw, mode, start, MIN2(count, 20)); + for (instance = 0; instance < instanceCount; instance++) { draw->instance_id = instance + startInstance; draw_pt_arrays(draw, mode, start, count); -- cgit v1.2.3 From 0e7b53c75be6051b2a935e65f4dbc02449714ee1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 3 Aug 2010 14:13:13 -0600 Subject: draw: use instance divisor in draw_print_arrays() --- src/gallium/auxiliary/draw/draw_pt.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 0a41328642..248927505d 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -259,6 +259,11 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) for (j = 0; j < draw->pt.nr_vertex_elements; j++) { uint buf = draw->pt.vertex_element[j].vertex_buffer_index; ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf]; + + if (draw->pt.vertex_element[j].instance_divisor) { + ii = draw->instance_id / draw->pt.vertex_element[j].instance_divisor; + } + ptr += draw->pt.vertex_buffer[buf].buffer_offset; ptr += draw->pt.vertex_buffer[buf].stride * ii; ptr += draw->pt.vertex_element[j].src_offset; -- cgit v1.2.3 From 8c3cc83432bbbc190b0f71f87e3e9583ae11f0b2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 3 Aug 2010 16:21:57 -0600 Subject: draw: added a comment --- src/gallium/auxiliary/draw/draw_pt_emit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 0229bcc7fe..5568fbb9f8 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -182,6 +182,7 @@ void draw_pt_emit( struct pt_emit *emit, 0, ~0); + /* fetch/translate vertex attribs to fill hw_verts[] */ translate->run( translate, 0, vertex_count, -- cgit v1.2.3 From e89e47e6d03f0c8b421268481d077100c2247253 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 3 Aug 2010 16:30:40 -0600 Subject: gallium/translate: make generic_run() and generic_run_elts() more alike Plus more debug code and do clamping in generic_run(). --- .../auxiliary/translate/translate_generic.c | 63 +++++++++++++++------- 1 file changed, 44 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 0e43a512ee..4d1977229e 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -368,23 +368,23 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, /* loop over vertex attributes (vertex shader inputs) */ for (i = 0; i < count; i++) { - unsigned elt = *elts++; + const unsigned elt = *elts++; for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - const uint8_t *src; - unsigned index; - - char *dst = (vert + - tg->attrib[attr].output_offset); + char *dst = vert + tg->attrib[attr].output_offset; if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const uint8_t *src; + unsigned index; + if (tg->attrib[attr].instance_divisor) { index = instance_id / tg->attrib[attr].instance_divisor; } else { index = elt; } + /* clamp to void going out of bounds */ index = MIN2(index, tg->attrib[attr].max_index); src = tg->attrib[attr].input_ptr + @@ -392,11 +392,23 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, tg->attrib[attr].fetch( data, src, 0, 0 ); + if (0) + debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: " + " %f, %f, %f, %f \n", + attr, + tg->attrib[attr].input_ptr, + tg->attrib[attr].input_stride, + tg->attrib[attr].instance_divisor, + tg->attrib[attr].max_index, + index, + data[0], data[1],data[2], data[3]); } else { data[0] = (float)instance_id; } - if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", - i, elt, attr, data[0], data[1], data[2], data[3]); + + if (0) + debug_printf("vert %d/%d attr %d: %f %f %f %f\n", + i, elt, attr, data[0], data[1], data[2], data[3]); tg->attrib[attr].emit( data, dst ); } @@ -425,29 +437,42 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - - char *dst = (vert + - tg->attrib[attr].output_offset); + char *dst = vert + tg->attrib[attr].output_offset; if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { const uint8_t *src; + unsigned index; if (tg->attrib[attr].instance_divisor) { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * - (instance_id / tg->attrib[attr].instance_divisor); - } else { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt; + index = instance_id / tg->attrib[attr].instance_divisor; } + else { + index = elt; + } + + /* clamp to void going out of bounds */ + index = MIN2(index, tg->attrib[attr].max_index); + + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * index; tg->attrib[attr].fetch( data, src, 0, 0 ); + + if (0) + debug_printf("Fetch linear attr %d from %p stride %d index %d: " + " %f, %f, %f, %f \n", + attr, + tg->attrib[attr].input_ptr, + tg->attrib[attr].input_stride, + index, + data[0], data[1],data[2], data[3]); } else { data[0] = (float)instance_id; } - if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", - i, attr, data[0], data[1], data[2], data[3]); + if (0) + debug_printf("vert %d attr %d: %f %f %f %f\n", + i, attr, data[0], data[1], data[2], data[3]); tg->attrib[attr].emit( data, dst ); } -- cgit v1.2.3 From 48268e0f2a5e65b63586398db3a58523a8c7a7a0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 3 Aug 2010 16:32:32 -0600 Subject: draw: check for instance divisors in vcache_check_run() When we have instance divisors we don't really know which vertex elements we'll be fetching ahead of time. This fixes a bug in instanced drawing which was exposed by the new draw_vbo() code because of max_index not being ~0 as often as it used to be. The test for max_index >= DRAW_PIPE_MAX_VERTICES often hid this problem before. --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 8ef94c3163..d2fa1c6d4e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -339,6 +339,25 @@ format_from_get_elt( pt_elt_func get_elt ) #endif +/** + * Check if any vertex attributes use instance divisors. + * Note that instance divisors complicate vertex fetching so we need + * to take the vcache path when they're in use. + */ +static boolean +any_instance_divisors(const struct draw_context *draw) +{ + uint i; + + for (i = 0; i < draw->pt.nr_vertex_elements; i++) { + uint div = draw->pt.vertex_element[i].instance_divisor; + if (div) + return TRUE; + } + return FALSE; +} + + static INLINE void vcache_check_run( struct draw_pt_front_end *frontend, pt_elt_func get_elt, @@ -382,6 +401,9 @@ vcache_check_run( struct draw_pt_front_end *frontend, if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES) goto fail; + if (any_instance_divisors(draw)) + goto fail; + fetch_count = max_index + 1 - min_index; if (0) -- cgit v1.2.3 From 5c2f01bbb076af8b8ae6e1803d95a9ae678c2d1c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 4 Aug 2010 14:00:04 +0800 Subject: draw: Fix the edge flags of flatshade_first polygons. This bug can be triggered by rendering polygons with glProvokingVertexEXT(GL_FIRST_VERTEX_CONVENTION_EXT); glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); --- src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index dac68ad439..a42162691b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -146,10 +146,10 @@ static void FUNC( struct draw_pt_front_end *frontend, if (flatfirst) { flags = (DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_1 | - DRAW_PIPE_EDGE_FLAG_2); - edge_next = DRAW_PIPE_EDGE_FLAG_2; - edge_finish = DRAW_PIPE_EDGE_FLAG_0; + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1); + edge_next = DRAW_PIPE_EDGE_FLAG_1; + edge_finish = DRAW_PIPE_EDGE_FLAG_2; } else { flags = (DRAW_PIPE_RESET_STIPPLE | -- cgit v1.2.3 From 988e86762f07eb8b685eef6f3302566f604db2a6 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 4 Aug 2010 12:55:21 -0700 Subject: draw: Fix return type of draw_translate_vinfo_size. Fixes typo from commit b609cfc7c9c38f26e7e6d6f7dd5dd6d38f4ed209. --- src/gallium/auxiliary/draw/draw_vertex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 3af31ffe12..e63cf5f4f9 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -166,7 +166,7 @@ static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit } } -static INLINE enum attrib_emit draw_translate_vinfo_size(enum attrib_emit emit) +static INLINE unsigned draw_translate_vinfo_size(enum attrib_emit emit) { switch (emit) { case EMIT_OMIT: -- cgit v1.2.3 From 6eb2a7fbafd49e75b6cbbee57f23dda63eff73ef Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 7 Jul 2010 15:20:19 +0200 Subject: r300g: implement hyper-z support. (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements fast Z clear, Z compression, and HiZ support for r300->r500 GPUs. It also allows cbzb clears when fast Z clears are being used for the ZB. It requires a kernel with hyper-z support. Thanks to Marek Olšák , who started this off, and Alex Deucher at AMD for providing lots of hints. v2: squashed zmask ram size fix] squashed r300g/blitter: fix Z readback when compressed] v3: rebase around texture changes in master - .1 fix more bits v4: migrated to using u_mm in r300_texture to manage hiz/zmask rams consistently disabled HiZ when using OQ flush z-cache before turning hyper-z off update hyper-z state on dsa state change store depthclearvalue across cbzb clears and replace it afterwards. Signed-off-by: Dave Airlie --- src/gallium/auxiliary/util/u_blitter.c | 44 +++++ src/gallium/auxiliary/util/u_blitter.h | 2 + src/gallium/drivers/r300/r300_blit.c | 69 +++++++- src/gallium/drivers/r300/r300_chipset.c | 54 +++++- src/gallium/drivers/r300/r300_chipset.h | 14 +- src/gallium/drivers/r300/r300_context.c | 38 ++++- src/gallium/drivers/r300/r300_context.h | 31 +++- src/gallium/drivers/r300/r300_debug.c | 1 + src/gallium/drivers/r300/r300_emit.c | 142 ++++++++++++++- src/gallium/drivers/r300/r300_emit.h | 3 + src/gallium/drivers/r300/r300_flush.c | 3 +- src/gallium/drivers/r300/r300_hyperz.c | 237 +++++++++++++++++++++++++- src/gallium/drivers/r300/r300_hyperz.h | 5 + src/gallium/drivers/r300/r300_reg.h | 1 + src/gallium/drivers/r300/r300_render.c | 3 +- src/gallium/drivers/r300/r300_screen.h | 1 + src/gallium/drivers/r300/r300_state.c | 71 ++++++-- src/gallium/drivers/r300/r300_state_derived.c | 4 +- src/gallium/drivers/r300/r300_texture.c | 9 + src/gallium/drivers/r300/r300_winsys.h | 1 + src/gallium/winsys/radeon/drm/radeon_drm.c | 10 ++ src/gallium/winsys/radeon/drm/radeon_r300.c | 2 + src/gallium/winsys/radeon/drm/radeon_winsys.h | 3 + 23 files changed, 701 insertions(+), 47 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 0d94aaae95..b5b86b7214 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -87,6 +87,7 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; + void *dsa_flush_depth_stencil; void *velem_state; @@ -156,6 +157,10 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->dsa_keep_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); + dsa.depth.writemask = 1; + ctx->dsa_flush_depth_stencil = + pipe->create_depth_stencil_alpha_state(pipe, &dsa); + dsa.depth.enabled = 1; dsa.depth.writemask = 1; dsa.depth.func = PIPE_FUNC_ALWAYS; @@ -940,3 +945,42 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } + +/* Clear a region of a depth stencil surface. */ +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->base.pipe; + struct pipe_framebuffer_state fb_state; + + assert(dstsurf->texture); + if (!dstsurf->texture) + return; + + /* check the saved state */ + blitter_check_saved_CSOs(ctx); + assert(blitter->saved_fb_state.nr_cbufs != ~0); + + /* bind CSOs */ + pipe->bind_blend_state(pipe, ctx->blend_keep_color); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); + + pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); + pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); + + /* set a framebuffer state */ + fb_state.width = dstsurf->width; + fb_state.height = dstsurf->height; + fb_state.nr_cbufs = 0; + fb_state.cbufs[0] = 0; + fb_state.zsbuf = dstsurf; + pipe->set_framebuffer_state(pipe, &fb_state); + + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, + UTIL_BLITTER_ATTRIB_NONE, NULL); + blitter_restore_CSOs(ctx); +} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index ba3f92eca8..f316587dea 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -200,6 +200,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf); /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index d125196b6d..6f8d9abfc8 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -22,6 +22,7 @@ #include "r300_context.h" #include "r300_texture.h" +#include "r300_winsys.h" #include "util/u_format.h" #include "util/u_pack_color.h" @@ -81,7 +82,7 @@ static void r300_blitter_end(struct r300_context *r300) } static uint32_t r300_depth_clear_cb_value(enum pipe_format format, - const float* rgba) + const float* rgba) { union util_color uc; util_pack_color(rgba, format, &uc); @@ -98,6 +99,9 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + if (r300->z_fastfill) + clear_buffers &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); + /* Only color clear allowed, and only one colorbuffer. */ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) return FALSE; @@ -105,6 +109,23 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, return r300_surface(fb->cbufs[0])->cbzb_allowed; } +static uint32_t r300_depth_clear_value(enum pipe_format format, + double depth, unsigned stencil) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return util_pack_z(format, depth); + + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return util_pack_z_stencil(format, depth, stencil); + + default: + assert(0); + return 0; + } +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -154,6 +175,22 @@ static void r300_clear(struct pipe_context* pipe, (struct r300_hyperz_state*)r300->hyperz_state.state; uint32_t width = fb->width; uint32_t height = fb->height; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + uint32_t hyperz_dcv = 0; + + /* Enable fast Z clear. + * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ + if ((buffers & (PIPE_CLEAR_DEPTH|PIPE_CLEAR_STENCIL)) && has_hyperz) { + + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + + r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); + if (r300->z_compression || r300->z_fastfill) + r300->zmask_clear.dirty = TRUE; + if (r300->hiz_enable) + r300->hiz_clear.dirty = TRUE; + } /* Enable CBZB clear. */ if (r300_cbzb_clear_allowed(r300, buffers)) { @@ -181,6 +218,7 @@ static void r300_clear(struct pipe_context* pipe, /* Disable CBZB clear. */ if (r300->cbzb_clear) { r300->cbzb_clear = FALSE; + hyperz->zb_depthclearvalue = hyperz_dcv; r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); } @@ -221,6 +259,29 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, r300_blitter_end(r300); } +/* Clear a region of a depth stencil surface. */ +static void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + struct pipe_subresource subdst) +{ + struct r300_context *r300 = r300_context(pipe); + struct pipe_surface *dstsurf; + struct r300_texture *tex = r300_texture(dst); + + /* only flush the zmask if we have one attached to this texture */ + if (!tex->zmask_mem[subdst.level]) + return; + + dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst, + subdst.face, subdst.level, 0, + PIPE_BIND_DEPTH_STENCIL); + r300->z_decomp_rd = TRUE; + r300_blitter_begin(r300, R300_CLEAR_SURFACE); + util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + r300_blitter_end(r300); + r300->z_decomp_rd = FALSE; +} + /* Copy a block of pixels from one surface to another using HW. */ static void r300_hw_copy_region(struct pipe_context* pipe, struct pipe_resource *dst, @@ -252,7 +313,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe, { enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; - + boolean is_depth; if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, @@ -279,6 +340,10 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } } + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + if (is_depth) { + r300_flush_depth_stencil(pipe, src, subsrc); + } if (old_format != new_format) { dst->format = new_format; src->format = new_format; diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 21f3b9d261..2df25f9c8e 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -36,7 +36,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->num_vert_fpus = 2; caps->num_tex_units = 16; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; - caps->has_hiz = TRUE; + caps->hiz_ram = 0; caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -49,6 +49,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R300; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4145: @@ -61,6 +63,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R300; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4150: @@ -77,8 +81,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4E54: case 0x4E56: caps->family = CHIP_FAMILY_RV350; - caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x4148: @@ -91,12 +95,16 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R350; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4E4A: caps->family = CHIP_FAMILY_R360; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5460: @@ -108,8 +116,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5B64: case 0x5B65: caps->family = CHIP_FAMILY_RV370; - caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x3150: @@ -120,6 +128,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x3E54: caps->family = CHIP_FAMILY_RV380; caps->high_second_pipe = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x4A48: @@ -135,6 +145,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5548: @@ -149,6 +161,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x554C: @@ -161,6 +175,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5D4C: @@ -172,6 +188,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4B48: @@ -182,6 +200,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5E4C: @@ -199,34 +219,36 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5954: case 0x5955: caps->family = CHIP_FAMILY_RS480; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5974: case 0x5975: caps->family = CHIP_FAMILY_RS482; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A41: case 0x5A42: caps->family = CHIP_FAMILY_RS400; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A61: case 0x5A62: caps->family = CHIP_FAMILY_RC410; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x791E: @@ -234,6 +256,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x793F: @@ -242,6 +266,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x796C: @@ -251,6 +277,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7100: @@ -270,6 +298,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R520; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7140: @@ -313,6 +343,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV515; caps->num_vert_fpus = 2; caps->is_r500 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x71C0: @@ -334,6 +366,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV530; caps->num_vert_fpus = 5; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7240: @@ -354,12 +388,16 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R580; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7280: caps->family = CHIP_FAMILY_RV570; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7281: @@ -376,6 +414,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV560; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; default: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 65750f54e7..e7ca642b4f 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -25,6 +25,14 @@ #include "pipe/p_compiler.h" +/* these are sizes in dwords */ +#define R300_HIZ_LIMIT 10240 +#define RV530_HIZ_LIMIT 15360 + +/* rv3xx have only one pipe */ +#define PIPE_ZMASK_SIZE 4096 +#define RV3xx_ZMASK_SIZE 5120 + /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { @@ -42,8 +50,10 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; - /* Some chipsets do not have HiZ RAM. */ - boolean has_hiz; + /* Some chipsets do not have HiZ RAM - other have varying amounts . */ + int hiz_ram; + /* some chipsets have zmask ram per pipe some don't */ + int zmask_ram; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index df90359058..0668fbc151 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,6 +30,7 @@ #include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" +#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_screen_buffer.h" #include "r300_winsys.h" @@ -114,6 +115,10 @@ static void r300_destroy_context(struct pipe_context* context) u_upload_destroy(r300->upload_vb); u_upload_destroy(r300->upload_ib); + /* setup hyper-z mm */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_hyperz_destroy_mm(r300); + translate_cache_destroy(r300->tran.translate_cache); r300_release_referenced_objects(r300); @@ -166,6 +171,8 @@ static void r300_setup_atoms(struct r300_context* r300) boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; /* Create the actual atom list. * @@ -188,8 +195,9 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(gpu_flush, 9); R300_INIT_ATOM(aa_state, 4); R300_INIT_ATOM(fb_state, 0); + if (has_hyperz) + R300_INIT_ATOM(hyperz_state, is_rv350 ? 10 : 8); /* ZB (unpipelined), SC. */ - R300_INIT_ATOM(hyperz_state, 6); R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); @@ -220,6 +228,13 @@ static void r300_setup_atoms(struct r300_context* r300) /* TX. */ R300_INIT_ATOM(texture_cache_inval, 2); R300_INIT_ATOM(textures_state, 0); + if (has_hyperz) { + /* HiZ Clear */ + if (has_hiz_ram) + R300_INIT_ATOM(hiz_clear, 0); + /* zmask clear */ + R300_INIT_ATOM(zmask_clear, 0); + } /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); @@ -236,7 +251,8 @@ static void r300_setup_atoms(struct r300_context* r300) r300->clip_state.state = CALLOC_STRUCT(r300_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state); - r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); + if (has_hyperz) + r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state); r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); @@ -282,8 +298,7 @@ static void r300_init_states(struct pipe_context *pipe) (struct r300_vap_invariant_state*)r300->vap_invariant_state.state; struct r300_invariant_state *invariant = (struct r300_invariant_state*)r300->invariant_state.state; - struct r300_hyperz_state *hyperz = - (struct r300_hyperz_state*)r300->hyperz_state.state; + CB_LOCALS; pipe->set_blend_color(pipe, &bc); @@ -350,11 +365,20 @@ static void r300_init_states(struct pipe_context *pipe) } /* Initialize the hyperz state. */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) { - BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size); + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; + BEGIN_CB(&hyperz->cb_flush_begin, r300->hyperz_state.size); + OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE); OUT_CB_REG(R300_ZB_BW_CNTL, 0); OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); + + if (r300->screen->caps.is_rv350) { + OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); + } END_CB; } } @@ -415,6 +439,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, rws->cs_set_flush(r300->cs, r300_flush_cb, r300); + /* setup hyper-z mm */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_hyperz_init_mm(r300); + r300->upload_ib = u_upload_create(&r300->context, 32 * 1024, 16, PIPE_BIND_INDEX_BUFFER); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 7c77a46016..d86a5c8fc9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -106,13 +106,19 @@ struct r300_dsa_state { }; struct r300_hyperz_state { + int current_func; /* -1 after a clear before first op */ + int flush; /* This is actually a command buffer with named dwords. */ + uint32_t cb_flush_begin; + uint32_t zb_zcache_ctlstat; /* R300_ZB_CACHE_CNTL */ uint32_t cb_begin; uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */ uint32_t cb_reg1; uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */ uint32_t cb_reg2; uint32_t sc_hyperz; /* R300_SC_HYPERZ */ + uint32_t cb_reg3; + uint32_t gb_z_peq_config; /* R300_GB_Z_PEQ_CONFIG: 0x4028 */ }; struct r300_gpu_flush { @@ -321,6 +327,7 @@ struct r300_surface { /* Whether the CBZB clear is allowed on the surface. */ boolean cbzb_allowed; + }; struct r300_texture_desc { @@ -387,6 +394,10 @@ struct r300_texture { /* All bits should be filled in. */ struct r300_texture_fb_state fb_state; + /* hyper-z memory allocs */ + struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; + struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; + /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ unsigned surface_level; @@ -512,6 +523,10 @@ struct r300_context { struct r300_atom texture_cache_inval; /* GPU flush. */ struct r300_atom gpu_flush; + /* HiZ clear */ + struct r300_atom hiz_clear; + /* zmask clear */ + struct r300_atom zmask_clear; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -549,8 +564,19 @@ struct r300_context { boolean two_sided_color; /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; - + /* Whether fast zclear is enabled. */ + boolean z_fastfill; +#define R300_Z_COMPRESS_44 1 +#define RV350_Z_COMPRESS_88 2 + int z_compression; + boolean hiz_enable; boolean cbzb_clear; + boolean z_decomp_rd; + + /* two mem block managers for hiz/zmask ram space */ + struct mem_block *hiz_mm; + struct mem_block *zmask_mm; + /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; @@ -621,7 +647,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); /* r300_state.c */ enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, - R300_CHANGED_CBZB_FLAG + R300_CHANGED_CBZB_FLAG, + R300_CHANGED_ZCLEAR_FLAG }; void r300_mark_fb_state_dirty(struct r300_context *r300, diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 053a64ea6d..c3e157e99a 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -44,6 +44,7 @@ static const struct debug_named_value debug_options[] = { { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, { "stats", DBG_STATS, "Gather statistics" }, + { "hyperz", DBG_HYPERZ, "HyperZ (for debugging)" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 36a26a7871..17e180a79a 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_simple_list.h" #include "r300_context.h" @@ -329,6 +330,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_surface* surf; unsigned i; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); CS_LOCALS(r300); BEGIN_CS(size); @@ -364,6 +366,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain); + + DBG(r300, DBG_CBZB, + "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, + surf->cbzb_pitch); } /* Set up a zbuffer. */ else if (fb->zsbuf) { @@ -377,15 +383,32 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); - /* HiZ RAM. */ - if (r300->screen->caps.has_hiz) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + if (has_hyperz) { + uint32_t surf_pitch; + struct r300_texture *tex; + int level = surf->base.level; + tex = r300_texture(surf->base.texture); + + surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; + /* HiZ RAM. */ + if (r300->screen->caps.hiz_ram) { + if (tex->hiz_mem[level]) { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + } + } + /* Z Mask RAM. (compressed zbuffer) */ + if (tex->zmask_mem[level]) { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); + } } - - /* Z Mask RAM. (compressed zbuffer) */ - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); } END_CS; @@ -394,8 +417,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) void r300_emit_hyperz_state(struct r300_context *r300, unsigned size, void *state) { + struct r300_hyperz_state *z = state; CS_LOCALS(r300); - WRITE_CS_TABLE(state, size); + if (z->flush) + WRITE_CS_TABLE(&z->cb_flush_begin, size); + else + WRITE_CS_TABLE(&z->cb_begin, size - 2); } void r300_emit_hyperz_end(struct r300_context *r300) @@ -403,9 +430,11 @@ void r300_emit_hyperz_end(struct r300_context *r300) struct r300_hyperz_state z = *(struct r300_hyperz_state*)r300->hyperz_state.state; + z.flush = 1; z.zb_bw_cntl = 0; z.zb_depthclearvalue = 0; z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z.gb_z_peq_config = 0; r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); } @@ -943,6 +972,101 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } +static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_hyperz_state *z = + (struct r300_hyperz_state*)r300->hyperz_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0, height, offset_shift; + struct r300_texture* tex; + int i; + + tex = r300_texture(fb->zsbuf->texture); + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + /* convert from pixels to 4x4 blocks */ + stride = ALIGN_DIVUP(stride, 4); + + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + /* there are 4 blocks per dwords */ + stride = ALIGN_DIVUP(stride, 4); + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + + offset_shift = 2; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); + } + z->current_func = -1; +} + +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0; + struct r300_texture* tex; + uint32_t i, height; + int mult, offset_shift; + + tex = r300_texture(fb->zsbuf->texture); + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + offset_shift = 4; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + + /* okay have width in pixels - divide by block width */ + stride = ALIGN_DIVUP(stride, mult); + /* have width in blocks - divide by number of fragment pipes screen width */ + /* 16 blocks per dword */ + stride = ALIGN_DIVUP(stride, 16); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); + } +} + void r300_emit_ztop_state(struct r300_context* r300, unsigned size, void* state) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 5d05039669..2f2c2f2dcb 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -112,6 +112,9 @@ void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, voi void r300_emit_invariant_state(struct r300_context *r300, unsigned size, void *state); +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state); +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state); + unsigned r300_get_num_dirty_dwords(struct r300_context *r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index fe182b6615..7fed9b5d07 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -44,7 +44,8 @@ static void r300_flush(struct pipe_context* pipe, u_upload_flush(r300->upload_ib); if (r300->dirty_hw) { - r300_emit_hyperz_end(r300); + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_emit_hyperz_end(r300); r300_emit_query_end(r300); r300->flush_counter++; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index e952895601..e719342a46 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -21,25 +21,158 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/u_format.h" +#include "util/u_mm.h" #include "r300_context.h" #include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" +#include "r300_emit.h" +#include "r300_texture.h" +/* + HiZ rules - taken from various docs + 1. HiZ only works on depth values + 2. Cannot HiZ if stencil fail or zfail is !KEEP + 3. on R300/400, HiZ is disabled if depth test is EQUAL + 4. comparison changes without clears usually mean disabling HiZ +*/ /*****************************************************************************/ /* The HyperZ setup */ /*****************************************************************************/ +static bool r300_get_sc_hz_max(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + int func = dsa_state->z_stencil_control & 0x7; + int ret = R300_SC_HYPERZ_MIN; + + if (func >= 4 && func <= 7) + ret = R300_SC_HYPERZ_MAX; + return ret; +} + +static bool r300_zfunc_same_direction(int func1, int func2) +{ + /* func1 is less/lessthan */ + if (func1 == 1 || func1 == 2) + if (func2 == 3 || func2 == 4 || func2 == 5) + return FALSE; + + if (func2 == 1 || func2 == 2) + if (func1 == 4 || func1 == 5) + return FALSE; + return TRUE; +} + +static int r300_get_hiz_min(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + int func = dsa_state->z_stencil_control & 0x7; + int ret = R300_HIZ_MIN; + + if (func == 1 || func == 2) + ret = R300_HIZ_MAX; + return ret; +} + +static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) +{ + if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP)) + return TRUE; + return FALSE; +} + +static boolean r300_can_hiz(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa; + struct r300_screen* r300screen = r300->screen; + struct r300_hyperz_state *z = r300->hyperz_state.state; + + /* shader writes depth - no HiZ */ + if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */ + return FALSE; + + if (r300->query_current) + return FALSE; + /* if stencil fail/zfail op is not KEEP */ + if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) || + r300_dsa_stencil_op_not_keep(&dsa->stencil[1])) + return FALSE; + + if (dsa->depth.enabled) { + /* if depth func is EQUAL pre-r500 */ + if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) + return FALSE; + /* if depth func is NOTEQUAL */ + if (dsa->depth.func == PIPE_FUNC_NOTEQUAL) + return FALSE; + } + /* depth comparison function - if just cleared save and return okay */ + if (z->current_func == -1) { + int func = dsa_state->z_stencil_control & 0x7; + if (func != 0 && func != 7) + z->current_func = dsa_state->z_stencil_control & 0x7; + } else { + /* simple don't change */ + if (!r300_zfunc_same_direction(z->current_func, (dsa_state->z_stencil_control & 0x7))) { + DBG(r300, DBG_HYPERZ, "z func changed direction - disabling hyper-z %d -> %d\n", z->current_func, dsa_state->z_stencil_control); + return FALSE; + } + } + return TRUE; +} + static void r300_update_hyperz(struct r300_context* r300) { struct r300_hyperz_state *z = (struct r300_hyperz_state*)r300->hyperz_state.state; + z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; z->sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z->flush = 0; - if (r300->cbzb_clear) + if (r300->cbzb_clear) { z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY; + return; + } + + /* Zbuffer compression. */ + if (r300->z_compression) { + z->zb_bw_cntl |= R300_RD_COMP_ENABLE; + if (r300->z_decomp_rd == false) + z->zb_bw_cntl |= R300_WR_COMP_ENABLE; + /* RV350 and up optimizations. */ + if (r300->z_compression == RV350_Z_COMPRESS_88) + z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; + } + + /* Z fastfill. */ + if (r300->z_fastfill) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ + } + + if (r300->hiz_enable) { + bool can_hiz = r300_can_hiz(r300); + if (can_hiz) { + z->zb_bw_cntl |= R300_HIZ_ENABLE; + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE; + z->sc_hyperz |= r300_get_sc_hz_max(r300); + z->zb_bw_cntl |= r300_get_hiz_min(r300); + } + } + + if (r300->screen->caps.is_r500) { + /* XXX Are these bits really available on RV350? */ + z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; + z->zb_bw_cntl |= + R500_HIZ_EQUAL_REJECT_ENABLE | + R500_PEQ_PACKING_ENABLE | + R500_COVERED_PTR_MASKING_ENABLE; + } } /*****************************************************************************/ @@ -126,15 +259,115 @@ static void r300_update_ztop(struct r300_context* r300) } else { ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } - if (ztop_state->z_buffer_top != old_ztop) r300->ztop_state.dirty = TRUE; } +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +static void r300_update_hiz_clear(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height; + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + r300->hiz_clear.size = height * 4; +} + +static void r300_update_zmask_clear(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height; + int mult; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + r300->zmask_clear.size = height * 4; +} + void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } + + if (r300->hiz_clear.dirty) { + r300_update_hiz_clear(r300); + } + if (r300->zmask_clear.dirty) { + r300_update_zmask_clear(r300); + } +} + +void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) +{ + struct r300_texture *tex; + uint32_t zsize, ndw; + int level = surf->base.level; + + tex = r300_texture(surf->base.texture); + + if (tex->hiz_mem[level]) + return; + + zsize = tex->desc.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->desc.b.b.format); + ndw = ALIGN_DIVUP(zsize, 64); + + tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); + return; +} + +void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress) +{ + int bsize = 256; + uint32_t zsize, ndw; + int level = surf->base.level; + struct r300_texture *tex; + + tex = r300_texture(surf->base.texture); + + if (tex->zmask_mem[level]) + return; + + zsize = tex->desc.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->desc.b.b.format); + + /* each zmask dword represents 16 4x4 blocks - which is 256 pixels + or 16 8x8 depending on the gb peq flag = 1024 pixels */ + if (compress == RV350_Z_COMPRESS_88) + bsize = 1024; + + ndw = ALIGN_DIVUP(zsize, bsize); + tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0); + return; +} + +void r300_hyperz_init_mm(struct r300_context *r300) +{ + struct r300_screen* r300screen = r300->screen; + int frag_pipes = r300screen->caps.num_frag_pipes; + + if (r300screen->caps.hiz_ram) + r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); + + r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes); +} + +void r300_hyperz_destroy_mm(struct r300_context *r300) +{ + struct r300_screen* r300screen = r300->screen; + + if (r300screen->caps.hiz_ram) + u_mmDestroy(r300->hiz_mm); + + u_mmDestroy(r300->zmask_mm); } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h index 3df5053b89..09e1ff6625 100644 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -27,4 +27,9 @@ struct r300_context; void r300_update_hyperz_state(struct r300_context* r300); +void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); +void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress); + +void r300_hyperz_init_mm(struct r300_context *r300); +void r300_hyperz_destroy_mm(struct r300_context *r300); #endif diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 2acc1a903e..99a9d65055 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -3436,6 +3436,7 @@ enum { # define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16) # define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24) +#define R300_PACKET3_3D_CLEAR_ZMASK 0x00003200 #define R300_PACKET3_INDX_BUFFER 0x00003300 # define R300_INDX_BUFFER_DST_SHIFT 0 # define R300_INDX_BUFFER_SKIP_SHIFT 16 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 7c4294bc9f..910f5f7113 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -223,7 +223,8 @@ static void r300_prepare_for_rendering(struct r300_context *r300, /* Emitted in flush. */ end_dwords += 26; /* emit_query_end */ - end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ cs_dwords += end_dwords; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 18745b83a0..13a3320b99 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -91,6 +91,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_FB (1 << 9) #define DBG_RS_BLOCK (1 << 10) #define DBG_CBZB (1 << 11) +#define DBG_HYPERZ (1 << 12) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index a3383c3878..374aa254f0 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -25,6 +25,7 @@ #include "util/u_blitter.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_memory.h" #include "util/u_pack_color.h" @@ -43,6 +44,7 @@ #include "r300_texture.h" #include "r300_vs.h" #include "r300_winsys.h" +#include "r300_hyperz.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -472,14 +474,14 @@ static void* dsa->dsa = *state; - /* Depth test setup. */ + /* Depth test setup. - separate write mask depth for decomp flush */ + if (state->depth.writemask) { + dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; + } + if (state->depth.enabled) { dsa->z_buffer_control |= R300_Z_ENABLE; - if (state->depth.writemask) { - dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; - } - dsa->z_stencil_control |= (r300_translate_depth_stencil_function(state->depth.func) << R300_Z_FUNC_SHIFT); @@ -592,6 +594,7 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, UPDATE_STATE(state, r300->dsa_state); + r300->hyperz_state.dirty = TRUE; /* Will be updated before the emission. */ r300_dsa_inject_stencilref(r300); } @@ -685,7 +688,8 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, /* What is marked as dirty depends on the enum r300_fb_state_change. */ r300->gpu_flush.dirty = TRUE; r300->fb_state.dirty = TRUE; - r300->hyperz_state.dirty = TRUE; + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300->hyperz_state.dirty = TRUE; if (change == R300_CHANGED_FB_STATE) { r300->aa_state.dirty = TRUE; @@ -698,7 +702,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, if (r300->cbzb_clear) r300->fb_state.size += 10; else if (state->zsbuf) - r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14; + r300->fb_state.size += r300->screen->caps.hiz_ram ? 18 : 14; /* The size of the rest of atoms stays the same. */ } @@ -710,8 +714,10 @@ static void struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; + int blocksize; if (r300->screen->caps.is_r500) { max_width = max_height = 4096; @@ -743,17 +749,52 @@ static void r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); - /* Polygon offset depends on the zbuffer bit depth. */ + r300->hiz_enable = false; + r300->z_fastfill = false; + r300->z_compression = false; + if (state->zsbuf) { - switch (util_format_get_blocksize(state->zsbuf->texture->format)) { - case 2: - zbuffer_bpp = 16; - break; - case 4: - zbuffer_bpp = 24; - break; + blocksize = util_format_get_blocksize(state->zsbuf->texture->format); + switch (blocksize) { + case 2: + zbuffer_bpp = 16; + break; + case 4: + zbuffer_bpp = 24; + break; } + if (has_hyperz) { + struct r300_surface *zs_surf = r300_surface(state->zsbuf); + struct r300_texture *tex; + int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44; + int level = zs_surf->base.level; + + tex = r300_texture(zs_surf->base.texture); + + /* work out whether we can support hiz on this buffer */ + r300_hiz_alloc_block(r300, zs_surf); + + /* work out whether we can support zmask features on this buffer */ + r300_zmask_alloc_block(r300, zs_surf, compress); + + if (tex->hiz_mem[level]) { + r300->hiz_enable = 1; + } + if (tex->zmask_mem[level]) { + r300->z_fastfill = 1; + /* compression causes hangs on 16-bit */ + if (zbuffer_bpp == 24) + r300->z_compression = compress; + } + DBG(r300, DBG_HYPERZ, + "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", r300->hiz_enable, + tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, + r300->z_compression, r300->z_fastfill, + tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); + } + + /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 66f64f0f6a..f3dad4c292 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -35,6 +35,7 @@ #include "r300_state_inlines.h" #include "r300_texture.h" #include "r300_vs.h" +#include "r300_winsys.h" /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ @@ -693,5 +694,6 @@ void r300_update_derived_state(struct r300_context* r300) } } - r300_update_hyperz_state(r300); + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_update_hyperz_state(r300); } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index fcdca5605e..da8eadd3b5 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -35,6 +35,7 @@ #include "util/u_format_s3tc.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_mm.h" #include "pipe/p_screen.h" @@ -645,8 +646,16 @@ static void r300_texture_destroy(struct pipe_screen *screen, { struct r300_texture* tex = (struct r300_texture*)texture; struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; + int i; rws->buffer_reference(rws, &tex->buffer, NULL); + for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { + if (tex->hiz_mem[i]) + u_mmFreeMem(tex->hiz_mem[i]); + if (tex->zmask_mem[i]) + u_mmFreeMem(tex->zmask_mem[i]); + } + FREE(tex); } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index ff11546a64..e7a1ede4fb 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -49,6 +49,7 @@ enum r300_value_id { R300_VID_Z_PIPES, R300_VID_SQUARE_TILING_SUPPORT, R300_VID_DRM_2_3_0, + R300_CAN_HYPERZ, }; enum r300_reference_domain { /* bitfield */ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c index e9a276362f..e7057ca593 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm.c @@ -130,6 +130,16 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) } winsys->z_pipes = target; + winsys->hyperz = FALSE; +#ifndef RADEON_INFO_WANT_HYPERZ +#define RADEON_INFO_WANT_HYPERZ 7 +#endif + info.request = RADEON_INFO_WANT_HYPERZ; + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (!retval && target == 1) { + winsys->hyperz = TRUE; + } + retval = drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); if (retval) { diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 5544504067..955ae4c045 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -211,6 +211,8 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return ws->squaretiling; case R300_VID_DRM_2_3_0: return ws->drm_2_3_0; + case R300_CAN_HYPERZ: + return ws->hyperz; } return 0; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 533b7b2e2d..52db0d62d2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -65,6 +65,9 @@ struct radeon_libdrm_winsys { */ boolean drm_2_3_0; + /* hyperz user */ + boolean hyperz; + /* DRM FD */ int fd; -- cgit v1.2.3 From 4bd061b127aedfa7f6cd2c9fb4763927588c7ad1 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Aug 2010 17:11:46 -0700 Subject: gallivm: Only get debug option once --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 69353dea09..ef0888079c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -45,6 +45,8 @@ static const struct debug_named_value lp_bld_debug_flags[] = { { "nopt", GALLIVM_DEBUG_NO_OPT, NULL }, DEBUG_NAMED_VALUE_END }; + +DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0); #endif @@ -89,7 +91,7 @@ void lp_build_init(void) { #ifdef DEBUG - gallivm_debug = debug_get_flags_option("GALLIVM_DEBUG", lp_bld_debug_flags, 0 ); + gallivm_debug = debug_get_option_gallivm_debug(); #endif lp_set_target_options(); -- cgit v1.2.3 From 4d65055b1f2023f95a4f9c2bfab55a67ef72739d Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Aug 2010 17:21:49 -0700 Subject: util: Add option to not dump cpu caps --- src/gallium/auxiliary/util/u_cpu_detect.c | 39 +++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 879643463f..6f38d22285 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -73,6 +73,9 @@ #endif +DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE); + + struct util_cpu_caps util_cpu_caps; #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) @@ -500,23 +503,25 @@ util_cpu_detect(void) #endif /* PIPE_ARCH_PPC */ #ifdef DEBUG - debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch); - debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); - - debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); - debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline); - - debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc); - debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx); - debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2); - debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse); - debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2); - debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); - debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); - debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); - debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); - debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); - debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); + if (debug_get_option_dump_cpu()) { + debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch); + debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); + + debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); + debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline); + + debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc); + debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx); + debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2); + debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse); + debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2); + debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); + debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); + debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); + debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); + debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); + debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); + } #endif util_cpu_detect_initialized = TRUE; -- cgit v1.2.3 From 9f5c1194ff0ff69be5d7641d68169b152bc6cd0a Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Aug 2010 17:44:05 -0700 Subject: tgsi: Add option to stop the sanity checker from printing --- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 13 +++++++++++++ src/gallium/auxiliary/tgsi/tgsi_sanity.h | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 97148dbe23..d0550110d8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -33,6 +33,10 @@ #include "tgsi_info.h" #include "tgsi_iterate.h" + +DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", FALSE); + + typedef struct { uint file : 28; /* max 2 dimensions */ @@ -54,6 +58,8 @@ struct sanity_check_ctx uint errors; uint warnings; uint implied_array_size; + + boolean print; }; static INLINE unsigned @@ -148,6 +154,9 @@ report_error( { va_list args; + if (!ctx->print) + return; + debug_printf( "Error : " ); va_start( args, format ); _debug_vprintf( format, args ); @@ -164,6 +173,9 @@ report_warning( { va_list args; + if (!ctx->print) + return; + debug_printf( "Warning: " ); va_start( args, format ); _debug_vprintf( format, args ); @@ -539,6 +551,7 @@ tgsi_sanity_check( ctx.errors = 0; ctx.warnings = 0; ctx.implied_array_size = 0; + ctx.print = debug_get_option_print_sanity(); if (!tgsi_iterate_shader( tokens, &ctx.iter )) return FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h index 52263ff883..46d8d18419 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -35,7 +35,8 @@ extern "C" { #endif /* Check the given token stream for errors and common mistakes. - * Diagnostic messages are printed out to the debug output. + * Diagnostic messages are printed out to the debug output, and is + * controlled by the debug option TGSI_PRINT_SANITY (default true). * Returns TRUE if there are no errors, even though there could be some warnings. */ boolean -- cgit v1.2.3 From 1a3a04d56b01714e4fa19aa7efcdae4b6644af46 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Aug 2010 17:52:39 -0700 Subject: tgsi: Fix typo, so we follow what is in the comments --- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index d0550110d8..9e02040f6c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -34,7 +34,7 @@ #include "tgsi_iterate.h" -DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE); typedef struct { -- cgit v1.2.3 From 524b2626c2d018f330ae7423c858ef73ea0424b5 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sun, 1 Aug 2010 21:29:36 +0800 Subject: draw: Add draw_decompose_tmp.h. Including draw_decompose_tmp.h defines a primitive decomposer. It is intended to replace the existing vcache/so/gs/pipe decomposers. This is based on draw_pt_vcache_tmp.h. --- src/gallium/auxiliary/draw/draw_decompose_tmp.h | 425 ++++++++++++++++++++++++ 1 file changed, 425 insertions(+) create mode 100644 src/gallium/auxiliary/draw/draw_decompose_tmp.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h new file mode 100644 index 0000000000..cb25202323 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h @@ -0,0 +1,425 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell + * Chia-I Wu + */ + +/* these macros are optional */ +#ifndef LOCAL_VARS +#define LOCAL_VARS do {} while (0) +#endif +#ifndef FUNC_ENTER +#define FUNC_ENTER do {} while (0) +#endif +#ifndef FUNC_EXIT +#define FUNC_EXIT do {} while (0) +#endif +#ifndef LINE_ADJ +#define LINE_ADJ(flags, a0, i0, i1, a1) LINE(flags, i0, i1) +#endif +#ifndef TRIANGLE_ADJ +#define TRIANGLE_ADJ(flags, i0, a0, i1, a1, i2, a2) TRIANGLE(flags, i0, i1, i2) +#endif + +static void +FUNC(FUNC_VARS) +{ + unsigned idx[6], i; + ushort flags; + LOCAL_VARS; + + FUNC_ENTER; + + /* prim, count, and last_vertex_last should have been defined */ + if (0) { + debug_printf("%s: prim 0x%x, count %d, last_vertex_last %d\n", + __FUNCTION__, prim, count, last_vertex_last); + } + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) { + idx[0] = GET_ELT(i); + POINT(idx[0]); + } + break; + + case PIPE_PRIM_LINES: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 0; i + 1 < count; i += 2) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + LINE(flags, idx[0], idx[1]); + } + break; + + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_LINE_STRIP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + idx[1] = GET_ELT(0); + idx[2] = idx[1]; + + for (i = 1; i < count; i++, flags = 0) { + idx[0] = idx[1]; + idx[1] = GET_ELT(i); + LINE(flags, idx[0], idx[1]); + } + /* close the loop */ + if (prim == PIPE_PRIM_LINE_LOOP) + LINE(flags, idx[1], idx[2]); + } + break; + + case PIPE_PRIM_TRIANGLES: + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (i = 0; i + 2 < count; i += 3) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (count >= 3) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + idx[1] = GET_ELT(0); + idx[2] = GET_ELT(1); + + if (last_vertex_last) { + for (i = 0; i + 2 < count; i++) { + idx[0] = idx[1]; + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[2] last */ + if (i & 1) + TRIANGLE(flags, idx[1], idx[0], idx[2]); + else + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + else { + for (i = 0; i + 2 < count; i++) { + idx[0] = idx[1]; + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[0] first */ + if (i & 1) + TRIANGLE(flags, idx[0], idx[2], idx[1]); + else + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + idx[0] = GET_ELT(0); + idx[2] = GET_ELT(1); + + /* idx[0] is neither the first nor the last vertex */ + if (last_vertex_last) { + for (i = 0; i + 2 < count; i++) { + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[2] last */ + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + else { + for (i = 0; i + 2 < count; i++) { + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[1] first */ + TRIANGLE(flags, idx[1], idx[2], idx[0]); + } + } + } + break; + + case PIPE_PRIM_QUADS: + if (last_vertex_last) { + for (i = 0; i + 3 < count; i += 4) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_2; + /* always emit idx[3] last */ + TRIANGLE(flags, idx[0], idx[1], idx[3]); + + flags = DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + TRIANGLE(flags, idx[1], idx[2], idx[3]); + } + } + else { + for (i = 0; i + 3 < count; i += 4) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + /* XXX should always emit idx[0] first */ + /* always emit idx[3] first */ + TRIANGLE(flags, idx[3], idx[0], idx[1]); + + flags = DRAW_PIPE_EDGE_FLAG_1 | + DRAW_PIPE_EDGE_FLAG_2; + TRIANGLE(flags, idx[3], idx[1], idx[2]); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + if (count >= 4) { + idx[2] = GET_ELT(0); + idx[3] = GET_ELT(1); + + if (last_vertex_last) { + for (i = 0; i + 3 < count; i += 2) { + idx[0] = idx[2]; + idx[1] = idx[3]; + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + /* always emit idx[3] last */ + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_2; + TRIANGLE(flags, idx[2], idx[0], idx[3]); + + flags = DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + TRIANGLE(flags, idx[0], idx[1], idx[3]); + } + } + else { + for (i = 0; i + 3 < count; i += 2) { + idx[0] = idx[2]; + idx[1] = idx[3]; + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + /* XXX should always emit idx[0] first */ + /* always emit idx[3] first */ + TRIANGLE(flags, idx[3], idx[2], idx[0]); + + flags = DRAW_PIPE_EDGE_FLAG_1 | + DRAW_PIPE_EDGE_FLAG_2; + TRIANGLE(flags, idx[3], idx[0], idx[1]); + } + } + } + break; + + case PIPE_PRIM_POLYGON: + if (count >= 3) { + ushort edge_next, edge_finish; + + if (last_vertex_last) { + flags = (DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_2 | + DRAW_PIPE_EDGE_FLAG_0); + edge_next = DRAW_PIPE_EDGE_FLAG_0; + edge_finish = DRAW_PIPE_EDGE_FLAG_1; + } + else { + flags = (DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1); + edge_next = DRAW_PIPE_EDGE_FLAG_1; + edge_finish = DRAW_PIPE_EDGE_FLAG_2; + } + + idx[0] = GET_ELT(0); + idx[2] = GET_ELT(1); + + for (i = 0; i + 2 < count; i++, flags = edge_next) { + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + + if (i + 3 == count) + flags |= edge_finish; + + /* idx[0] is both the first and the last vertex */ + if (last_vertex_last) + TRIANGLE(flags, idx[1], idx[2], idx[0]); + else + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + break; + + case PIPE_PRIM_LINES_ADJACENCY: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 0; i + 3 < count; i += 4) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]); + } + break; + + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + if (count >= 4) { + flags = DRAW_PIPE_RESET_STIPPLE; + idx[1] = GET_ELT(0); + idx[2] = GET_ELT(1); + idx[3] = GET_ELT(2); + + for (i = 1; i + 2 < count; i++, flags = 0) { + idx[0] = idx[1]; + idx[1] = idx[2]; + idx[2] = idx[3]; + idx[3] = GET_ELT(i + 2); + LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]); + } + } + break; + + case PIPE_PRIM_TRIANGLES_ADJACENCY: + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (i = 0; i + 5 < count; i += 6) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + idx[4] = GET_ELT(i + 4); + idx[5] = GET_ELT(i + 5); + TRIANGLE_ADJ(flags, idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + if (count >= 6) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + idx[0] = GET_ELT(1); + idx[2] = GET_ELT(0); + idx[4] = GET_ELT(2); + idx[3] = GET_ELT(4); + + /* + * The vertices of the i-th triangle are stored in + * idx[0,2,4] = { 2*i, 2*i+2, 2*i+4 }; + * + * The adjacent vertices are stored in + * idx[1,3,5] = { 2*i-2, 2*i+6, 2*i+3 }. + * + * However, there are two exceptions: + * + * For the first triangle, idx[1] = 1; + * For the last triangle, idx[3] = 2*i+5. + */ + if (last_vertex_last) { + for (i = 0; i + 5 < count; i += 2) { + idx[1] = idx[0]; + + idx[0] = idx[2]; + idx[2] = idx[4]; + idx[4] = idx[3]; + + idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5)); + idx[5] = GET_ELT(i + 3); + + /* + * alternate the first two vertices (idx[0] and idx[2]) and the + * corresponding adjacent vertices (idx[3] and idx[5]) to have + * the correct orientation + */ + if (i & 2) { + TRIANGLE_ADJ(flags, + idx[2], idx[1], idx[0], idx[5], idx[4], idx[3]); + } + else { + TRIANGLE_ADJ(flags, + idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); + } + } + } + else { + for (i = 0; i + 5 < count; i += 2) { + idx[1] = idx[0]; + + idx[0] = idx[2]; + idx[2] = idx[4]; + idx[4] = idx[3]; + + idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5)); + idx[5] = GET_ELT(i + 3); + + /* + * alternate the last two vertices (idx[2] and idx[4]) and the + * corresponding adjacent vertices (idx[1] and idx[5]) to have + * the correct orientation + */ + if (i & 2) { + TRIANGLE_ADJ(flags, + idx[0], idx[5], idx[4], idx[3], idx[2], idx[1]); + } + else { + TRIANGLE_ADJ(flags, + idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); + } + } + } + } + break; + + default: + assert(0); + break; + } + + FUNC_EXIT; +} + +#undef LOCAL_VARS +#undef FUNC_ENTER +#undef FUNC_EXIT +#undef LINE_ADJ +#undef TRIANGLE_ADJ + +#undef FUNC +#undef FUNC_VARS +#undef GET_ELT +#undef POINT +#undef LINE +#undef TRIANGLE -- cgit v1.2.3 From 8a41b18b7d731b5db2df5523dbe26143ae171c3a Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 2 Aug 2010 01:21:08 +0800 Subject: draw: Include draw_decompose_tmp.h in draw_pt_vcache_tmp.h. Use draw_decompose_tmp.h to replace vcache primitive decomposer. As the new decomposer supports primitives with adjacency, vcache_triangle_adj and vcache_line_adj (and their variants that have flags) are added. --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 135 ++++++++------- src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 209 ++---------------------- 2 files changed, 92 insertions(+), 252 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index d2fa1c6d4e..a848b54f7d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -95,7 +95,7 @@ static INLINE void vcache_check_flush( struct vcache_frontend *vcache ) { if (vcache->draw_count + 6 >= DRAW_MAX || - vcache->fetch_count + 4 >= FETCH_MAX) { + vcache->fetch_count + 6 >= FETCH_MAX) { vcache_flush( vcache ); } } @@ -180,59 +180,61 @@ vcache_point( struct vcache_frontend *vcache, } -static INLINE void -vcache_quad( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1, - unsigned i2, - unsigned i3 ) +static INLINE void +vcache_line_adj_flags( struct vcache_frontend *vcache, + unsigned flags, + unsigned a0, unsigned i0, unsigned i1, unsigned a1 ) { - if (vcache->draw->rasterizer->flatshade_first) { - /* pass last quad vertex as first triangle vertex */ - vcache_triangle( vcache, i3, i0, i1 ); - vcache_triangle( vcache, i3, i1, i2 ); - } - else { - /* pass last quad vertex as last triangle vertex */ - vcache_triangle( vcache, i0, i1, i3 ); - vcache_triangle( vcache, i1, i2, i3 ); - } + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_check_flush(vcache); } -static INLINE void -vcache_ef_quad( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1, - unsigned i2, - unsigned i3 ) +static INLINE void +vcache_line_adj( struct vcache_frontend *vcache, + unsigned a0, unsigned i0, unsigned i1, unsigned a1 ) { - if (vcache->draw->rasterizer->flatshade_first) { - /* pass last quad vertex as first triangle vertex */ - vcache_triangle_flags( vcache, - ( DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_1 ), - i3, i0, i1 ); - - vcache_triangle_flags( vcache, - ( DRAW_PIPE_EDGE_FLAG_1 | - DRAW_PIPE_EDGE_FLAG_2 ), - i3, i1, i2 ); - } - else { - /* pass last quad vertex as last triangle vertex */ - vcache_triangle_flags( vcache, - ( DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_2 ), - i0, i1, i3 ); - - vcache_triangle_flags( vcache, - ( DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_1 ), - i1, i2, i3 ); - } + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_check_flush(vcache); +} + + +static INLINE void +vcache_triangle_adj_flags( struct vcache_frontend *vcache, + unsigned flags, + unsigned i0, unsigned a0, + unsigned i1, unsigned a1, + unsigned i2, unsigned a2 ) +{ + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_elt(vcache, i2, 0); + vcache_elt(vcache, a2, 0); + vcache_check_flush(vcache); +} + + +static INLINE void +vcache_triangle_adj( struct vcache_frontend *vcache, + unsigned i0, unsigned a0, + unsigned i1, unsigned a1, + unsigned i2, unsigned a2 ) +{ + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_elt(vcache, i2, 0); + vcache_elt(vcache, a2, 0); + vcache_check_flush(vcache); } @@ -240,17 +242,23 @@ vcache_ef_quad( struct vcache_frontend *vcache, * this. The two paths aren't too different though - it may be * possible to reunify them. */ -#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2) -#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3) -#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1) -#define POINT(vc,i0) vcache_point(vc,i0) +#define TRIANGLE(flags,i0,i1,i2) vcache_triangle_flags(vcache,flags,i0,i1,i2) +#define LINE(flags,i0,i1) vcache_line_flags(vcache,flags,i0,i1) +#define POINT(i0) vcache_point(vcache,i0) +#define LINE_ADJ(flags,a0,i0,i1,a1) \ + vcache_line_adj_flags(vcache,flags,a0,i0,i1,a1) +#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \ + vcache_triangle_adj_flags(vcache,flags,i0,a0,i1,a1,i2,a2) #define FUNC vcache_run_extras #include "draw_pt_vcache_tmp.h" -#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2) -#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3) -#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1) -#define POINT(vc,i0) vcache_point(vc,i0) +#define TRIANGLE(flags,i0,i1,i2) vcache_triangle(vcache,i0,i1,i2) +#define LINE(flags,i0,i1) vcache_line(vcache,i0,i1) +#define POINT(i0) vcache_point(vcache,i0) +#define LINE_ADJ(flags,a0,i0,i1,a1) \ + vcache_line_adj(vcache,a0,i0,i1,a1) +#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \ + vcache_triangle_adj(vcache,i0,a0,i1,a1,i2,a2) #define FUNC vcache_run #include "draw_pt_vcache_tmp.h" @@ -540,7 +548,18 @@ vcache_prepare( struct draw_pt_front_end *frontend, * which is a separate issue. */ vcache->input_prim = in_prim; - vcache->output_prim = u_reduced_prim(in_prim); + switch (in_prim) { + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + vcache->output_prim = PIPE_PRIM_LINES_ADJACENCY; + break; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + vcache->output_prim = PIPE_PRIM_TRIANGLES_ADJACENCY; + break; + default: + vcache->output_prim = u_reduced_prim(in_prim); + } vcache->middle = middle; vcache->opt = opt; diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index a42162691b..861ce1adaa 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -1,198 +1,19 @@ +#define FUNC_VARS \ + struct draw_pt_front_end *frontend, \ + pt_elt_func get_elt, \ + const void *elts, \ + int elt_bias, \ + unsigned count +#define LOCAL_VARS \ + struct vcache_frontend *vcache = (struct vcache_frontend *) frontend; \ + struct draw_context *draw = vcache->draw; \ + const unsigned prim = vcache->input_prim; \ + const boolean last_vertex_last = !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first) -static void FUNC( struct draw_pt_front_end *frontend, - pt_elt_func get_elt, - const void *elts, - int elt_bias, - unsigned count ) -{ - struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - struct draw_context *draw = vcache->draw; +#define GET_ELT(idx) (get_elt(elts, idx) + elt_bias) - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i; - ushort flags; +#define FUNC_EXIT do { vcache_flush(vcache); } while (0) - if (0) debug_printf("%s %d\n", __FUNCTION__, count); - - - switch (vcache->input_prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - POINT( vcache, - get_elt(elts, i + 0) + elt_bias ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( vcache, - DRAW_PIPE_RESET_STIPPLE, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - flags = DRAW_PIPE_RESET_STIPPLE; - - for (i = 1; i < count; i++, flags = 0) { - LINE( vcache, - flags, - get_elt(elts, i - 1) + elt_bias, - get_elt(elts, i ) + elt_bias); - } - - LINE( vcache, - flags, - get_elt(elts, i - 1) + elt_bias, - get_elt(elts, 0 ) + elt_bias); - } - break; - - case PIPE_PRIM_LINE_STRIP: - flags = DRAW_PIPE_RESET_STIPPLE; - for (i = 1; i < count; i++, flags = 0) { - LINE( vcache, - flags, - get_elt(elts, i - 1) + elt_bias, - get_elt(elts, i ) + elt_bias); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2 ) + elt_bias); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1 + (i&1)) + elt_bias, - get_elt(elts, i + 2 - (i&1)) + elt_bias); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0 + (i&1)) + elt_bias, - get_elt(elts, i + 1 - (i&1)) + elt_bias, - get_elt(elts, i + 2 ) + elt_bias); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, 0 ) + elt_bias); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2 ) + elt_bias); - } - } - } - break; - - - case PIPE_PRIM_QUADS: - for (i = 0; i+3 < count; i += 4) { - QUAD( vcache, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, i + 3) + elt_bias ); - } - break; - - case PIPE_PRIM_QUAD_STRIP: - for (i = 0; i+3 < count; i += 2) { - QUAD( vcache, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 3) + elt_bias ); - } - break; - - case PIPE_PRIM_POLYGON: - { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - ushort edge_next, edge_finish; - - if (flatfirst) { - flags = (DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_1); - edge_next = DRAW_PIPE_EDGE_FLAG_1; - edge_finish = DRAW_PIPE_EDGE_FLAG_2; - } - else { - flags = (DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_2 | - DRAW_PIPE_EDGE_FLAG_0); - edge_next = DRAW_PIPE_EDGE_FLAG_0; - edge_finish = DRAW_PIPE_EDGE_FLAG_1; - } - - for (i = 0; i+2 < count; i++, flags = edge_next) { - - if (i + 3 == count) - flags |= edge_finish; - - if (flatfirst) { - TRIANGLE( vcache, - flags, - get_elt(elts, 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias ); - } - else { - TRIANGLE( vcache, - flags, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, 0) + elt_bias); - } - } - } - break; - - default: - assert(0); - break; - } - - vcache_flush( vcache ); -} - - -#undef TRIANGLE -#undef QUAD -#undef POINT -#undef LINE -#undef FUNC +#include "draw_decompose_tmp.h" -- cgit v1.2.3 From 23176779f88c5cff7365698f391194141e11e64c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 2 Aug 2010 01:38:58 +0800 Subject: draw: Include draw_decompose_tmp.h in draw_gs_tmp.h. Use draw_decompose_tmp.h to replace GS primitive decomposer. --- src/gallium/auxiliary/draw/draw_gs.c | 24 +--- src/gallium/auxiliary/draw/draw_gs_tmp.h | 186 ++++++------------------------- 2 files changed, 39 insertions(+), 171 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index cff859a42b..f2535c5a54 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -373,28 +373,14 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, gs_flush(shader, 1); } -#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,i0,i1,i2) -#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5) -#define LINE(gs,i0,i1) gs_line(gs,i0,i1) -#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3) -#define POINT(gs,i0) gs_point(gs,i0) -#define FUNC gs_run -#define LOCAL_VARS +#define FUNC gs_run +#define GET_ELT(idx) (idx) #include "draw_gs_tmp.h" -#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,elts[i0],elts[i1],elts[i2]) -#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) \ - gs_tri_adj(gs,elts[i0],elts[i1],elts[i2],elts[i3], \ - elts[i4],elts[i5]) -#define LINE(gs,i0,i1) gs_line(gs,elts[i0],elts[i1]) -#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,elts[i0], \ - elts[i1], \ - elts[i2],elts[i3]) -#define POINT(gs,i0) gs_point(gs,elts[i0]) -#define FUNC gs_run_elts -#define LOCAL_VARS \ - const ushort *elts = input_prims->elts; +#define FUNC gs_run_elts +#define LOCAL_VARS const ushort *elts = input_prims->elts; +#define GET_ELT(idx) (elts[idx]) #include "draw_gs_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h index 7a8683cf7c..4a17af0dea 100644 --- a/src/gallium/auxiliary/draw/draw_gs_tmp.h +++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h @@ -1,152 +1,34 @@ - -static void FUNC( struct draw_geometry_shader *shader, - const struct draw_prim_info *input_prims, - const struct draw_vertex_info *input_verts, - struct draw_prim_info *output_prims, - struct draw_vertex_info *output_verts) -{ - struct draw_context *draw = shader->draw; - - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i, j; - unsigned count = input_prims->count; - LOCAL_VARS - - if (0) debug_printf("%s %d\n", __FUNCTION__, count); - - debug_assert(input_prims->primitive_count == 1); - - switch (input_prims->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i++) { - POINT( shader, i + 0 ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( shader , i + 0 , i + 1 ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - - for (i = 1; i < count; i++) { - LINE( shader, i - 1, i ); - } - - LINE( shader, i - 1, 0 ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - LINE( shader, i - 1, i ); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( shader, i + 0, i + 1, i + 2 ); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - i + 0, - i + 1 + (i&1), - i + 2 - (i&1) ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - i + 0 + (i&1), - i + 1 - (i&1), - i + 2 ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - i + 1, - i + 2, - 0 ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - 0, - i + 1, - i + 2 ); - } - } - } - break; - - case PIPE_PRIM_POLYGON: - { - for (i = 0; i+2 < count; i++) { - - if (flatfirst) { - TRIANGLE( shader, 0, i + 1, i + 2 ); - } - else { - TRIANGLE( shader, i + 1, i + 2, 0 ); - } - } - } - break; - - case PIPE_PRIM_LINES_ADJACENCY: - for (i = 0; i+3 < count; i += 4) { - LINE_ADJ( shader , i + 0 , i + 1, i + 2, i + 3 ); - } - break; - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - for (i = 1; i + 2 < count; i++) { - LINE_ADJ( shader, i - 1, i, i + 1, i + 2 ); - } - break; - - case PIPE_PRIM_TRIANGLES_ADJACENCY: - for (i = 0; i+5 < count; i += 5) { - TRI_ADJ( shader, i + 0, i + 1, i + 2, - i + 3, i + 4, i + 5); - } - break; - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - for (i = 0, j = 0; i+5 < count; i += 2, ++j) { - TRI_ADJ( shader, - i + 0, - i + 1 + 2*(j&1), - i + 2 + 2*(j&1), - i + 3 - 2*(j&1), - i + 4 - 2*(j&1), - i + 5); - } - break; - - default: - debug_assert(!"Unsupported primitive in geometry shader"); - break; - } -} - - -#undef TRIANGLE -#undef TRI_ADJ -#undef POINT -#undef LINE -#undef LINE_ADJ -#undef FUNC -#undef LOCAL_VARS +#define FUNC_VARS struct draw_geometry_shader *gs, \ + const struct draw_prim_info *input_prims, \ + const struct draw_vertex_info *input_verts, \ + struct draw_prim_info *output_prims, \ + struct draw_vertex_info *output_verts + +#define FUNC_ENTER \ + /* declare more local vars */ \ + struct draw_context *draw = gs->draw; \ + const unsigned prim = input_prims->prim; \ + const unsigned count = input_prims->count; \ + const boolean last_vertex_last = \ + !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first); \ + do { \ + debug_assert(input_prims->primitive_count == 1); \ + switch (prim) { \ + case PIPE_PRIM_QUADS: \ + case PIPE_PRIM_QUAD_STRIP: \ + case PIPE_PRIM_POLYGON: \ + debug_assert(!"unexpected primitive type in GS"); \ + return; \ + default: \ + break; \ + } \ + } while (0) \ + +#define POINT(i0) gs_point(gs,i0) +#define LINE(flags,i0,i1) gs_line(gs,i0,i1) +#define TRIANGLE(flags,i0,i1,i2) gs_tri(gs,i0,i1,i2) +#define LINE_ADJ(flags,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3) +#define TRIANGLE_ADJ(flags,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5) + +#include "draw_decompose_tmp.h" -- cgit v1.2.3 From 94d256591d83ac1330f7237e865784618d124d09 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 2 Aug 2010 01:38:58 +0800 Subject: draw: Include draw_decompose_tmp.h in draw_so_emit_tmp.h. Use draw_decompose_tmp.h to replace stream out primitive decomposer. --- src/gallium/auxiliary/draw/draw_pt_so_emit.c | 20 +--- src/gallium/auxiliary/draw/draw_so_emit_tmp.h | 156 ++++++-------------------- 2 files changed, 38 insertions(+), 138 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index 5d82934889..c86bdd99a3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -218,25 +218,15 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2) } -#define TRIANGLE(gs,i0,i1,i2) so_tri(so,i0,i1,i2) -#define LINE(gs,i0,i1) so_line(so,i0,i1) -#define POINT(gs,i0) so_point(so,i0) -#define FUNC so_run_linear -#define LOCAL_VARS +#define FUNC so_run_linear +#define GET_ELT(idx) (start + (idx)) #include "draw_so_emit_tmp.h" -#undef LOCAL_VARS -#undef FUNC -#define TRIANGLE(gs,i0,i1,i2) so_tri(gs,elts[i0],elts[i1],elts[i2]) -#define LINE(gs,i0,i1) so_line(gs,elts[i0],elts[i1]) -#define POINT(gs,i0) so_point(gs,elts[i0]) -#define FUNC so_run_elts -#define LOCAL_VARS \ - const ushort *elts = input_prims->elts; +#define FUNC so_run_elts +#define LOCAL_VARS const ushort *elts = input_prims->elts; +#define GET_ELT(idx) (elts[start + (idx)]) #include "draw_so_emit_tmp.h" -#undef LOCAL_VARS -#undef FUNC void draw_pt_so_emit( struct pt_so_emit *emit, diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h index 01212a8e53..6d8937a0b4 100644 --- a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h +++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h @@ -1,123 +1,33 @@ - -static void FUNC( struct pt_so_emit *so, - const struct draw_prim_info *input_prims, - const struct draw_vertex_info *input_verts, - unsigned start, - unsigned count) -{ - struct draw_context *draw = so->draw; - - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i; - LOCAL_VARS - - if (0) debug_printf("%s %d\n", __FUNCTION__, count); - - debug_assert(input_prims->primitive_count == 1); - - switch (input_prims->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i++) { - POINT( so, start + i + 0 ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( so , start + i + 0 , start + i + 1 ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - - for (i = 1; i < count; i++) { - LINE( so, start + i - 1, start + i ); - } - - LINE( so, start + i - 1, start ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - LINE( so, start + i - 1, start + i ); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( so, start + i + 0, start + i + 1, start + i + 2 ); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start + i + 0, - start + i + 1 + (i&1), - start + i + 2 - (i&1) ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start + i + 0 + (i&1), - start + i + 1 - (i&1), - start + i + 2 ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start + i + 1, - start + i + 2, - start ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start, - start + i + 1, - start + i + 2 ); - } - } - } - break; - - case PIPE_PRIM_POLYGON: - { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - - for (i = 0; i+2 < count; i++) { - - if (flatfirst) { - TRIANGLE( so, start + 0, start + i + 1, start + i + 2 ); - } - else { - TRIANGLE( so, start + i + 1, start + i + 2, start + 0 ); - } - } - } - break; - - default: - debug_assert(!"Unsupported primitive in stream output"); - break; - } -} - - -#undef TRIANGLE -#undef POINT -#undef LINE -#undef FUNC +#define FUNC_VARS \ + struct pt_so_emit *so, \ + const struct draw_prim_info *input_prims, \ + const struct draw_vertex_info *input_verts, \ + unsigned start, \ + unsigned count + +#define FUNC_ENTER \ + /* declare more local vars */ \ + struct draw_context *draw = so->draw; \ + const unsigned prim = input_prims->prim; \ + const boolean last_vertex_last = \ + !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first); \ + do { \ + debug_assert(input_prims->primitive_count == 1); \ + switch (prim) { \ + case PIPE_PRIM_LINES_ADJACENCY: \ + case PIPE_PRIM_LINE_STRIP_ADJACENCY: \ + case PIPE_PRIM_TRIANGLES_ADJACENCY: \ + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: \ + debug_assert(!"unexpected primitive type in stream output"); \ + return; \ + default: \ + break; \ + } \ + } while (0) \ + +#define POINT(i0) so_point(so,i0) +#define LINE(flags,i0,i1) so_line(so,i0,i1) +#define TRIANGLE(flags,i0,i1,i2) so_tri(so,i0,i1,i2) + +#include "draw_decompose_tmp.h" -- cgit v1.2.3 From eb3c6ddafb7a1b544243e9dec991cc24d16940ea Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 2 Aug 2010 01:38:58 +0800 Subject: draw: Include draw_decompose_tmp.h in draw_pt_decompose.h. Use draw_decompose_tmp.h to replace pipeline primitive decomposer. --- src/gallium/auxiliary/draw/draw_pipe.c | 133 ++++------------- src/gallium/auxiliary/draw/draw_pt_decompose.h | 199 +------------------------ 2 files changed, 36 insertions(+), 296 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 144f10a5d0..3421b826d9 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -169,77 +169,41 @@ static void do_triangle( struct draw_context *draw, /* * Set up macros for draw_pt_decompose.h template code. * This code uses vertex indexes / elements. + * + * Flags are needed by the stipple and unfilled stages. When the two stages + * are active, vcache_run_extras is called and the flags are stored in the + * higher bits of i0. Otherwise, flags do not matter. */ -/* emit first quad vertex as first vertex in triangles */ -#define QUAD_FIRST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_1 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) - -/* emit last quad vertex as last vertex in triangles */ -#define QUAD_LAST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) - -#define TRIANGLE(flags,i0,i1,i2) \ - do_triangle( draw, \ - elts[i0], /* flags */ \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK) ); - -#define LINE(flags,i0,i1) \ - do_line( draw, \ - elts[i0], \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK) ); +#define TRIANGLE(flags,i0,i1,i2) \ + do_triangle( draw, \ + i0, /* flags */ \ + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i2 & ~DRAW_PIPE_FLAG_MASK) ) + +#define LINE(flags,i0,i1) \ + do_line( draw, \ + i0, /* flags */ \ + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1 & ~DRAW_PIPE_FLAG_MASK) ) #define POINT(i0) \ do_point( draw, \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) ) + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK) ) + +#define GET_ELT(idx) (elts[idx]) #define FUNC pipe_run_elts -#define ARGS \ +#define FUNC_VARS \ struct draw_context *draw, \ unsigned prim, \ struct vertex_header *vertices, \ unsigned stride, \ - const ushort *elts - -#define LOCAL_VARS \ - char *verts = (char *)vertices; \ - boolean flatfirst = (draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first); \ - unsigned i; \ - ushort flags - -#define FLUSH + const ushort *elts, \ + unsigned count #include "draw_pt_decompose.h" -#undef ARGS -#undef LOCAL_VARS @@ -304,38 +268,6 @@ void draw_pipeline_run( struct draw_context *draw, * This code is for non-indexed (aka linear) rendering (no elts). */ -/* emit first quad vertex as first vertex in triangles */ -#define QUAD_FIRST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_1 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) - -/* emit last quad vertex as last vertex in triangles */ -#define QUAD_LAST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) - #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ flags, /* flags */ \ @@ -353,21 +285,16 @@ void draw_pipeline_run( struct draw_context *draw, do_point( draw, \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) ) -#define FUNC pipe_run_linear -#define ARGS \ - struct draw_context *draw, \ - unsigned prim, \ - struct vertex_header *vertices, \ - unsigned stride -#define LOCAL_VARS \ - char *verts = (char *)vertices; \ - boolean flatfirst = (draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first); \ - unsigned i; \ - ushort flags +#define GET_ELT(idx) (idx) -#define FLUSH +#define FUNC pipe_run_linear +#define FUNC_VARS \ + struct draw_context *draw, \ + unsigned prim, \ + struct vertex_header *vertices, \ + unsigned stride, \ + unsigned count #include "draw_pt_decompose.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h index 52f9593d46..e7ae9c4449 100644 --- a/src/gallium/auxiliary/draw/draw_pt_decompose.h +++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h @@ -1,194 +1,7 @@ +#define LOCAL_VARS \ + char *verts = (char *) vertices; \ + const boolean last_vertex_last = \ + !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first) - -static void FUNC( ARGS, - unsigned count ) -{ - LOCAL_VARS; - - switch (prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - POINT( (i + 0) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( DRAW_PIPE_RESET_STIPPLE, - (i + 0), - (i + 1)); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - flags = DRAW_PIPE_RESET_STIPPLE; - - for (i = 1; i < count; i++, flags = 0) { - LINE( flags, - (i - 1), - (i )); - } - - LINE( flags, - (i - 1), - (0 )); - } - break; - - case PIPE_PRIM_LINE_STRIP: - flags = DRAW_PIPE_RESET_STIPPLE; - for (i = 1; i < count; i++, flags = 0) { - LINE( flags, - (i - 1), - (i )); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 0), - (i + 1), - (i + 2 )); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - /* Emit first triangle vertex as first triangle vertex */ - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 0), - (i + 1 + (i&1)), - (i + 2 - (i&1)) ); - } - } - else { - for (i = 0; i+2 < count; i++) { - /* Emit last triangle vertex as last triangle vertex */ - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 0 + (i&1)), - (i + 1 - (i&1)), - (i + 2 )); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 1), - (i + 2), - 0 ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (0), - (i + 1), - (i + 2 )); - } - } - } - break; - - - case PIPE_PRIM_QUADS: - /* GL quads don't follow provoking vertex convention */ - if (flatfirst) { - for (i = 0; i+3 < count; i += 4) { - /* emit last quad vertex as first triangle vertex */ - QUAD_FIRST_PV( (i + 3), - (i + 0), - (i + 1), - (i + 2) ); - } - } - else { - for (i = 0; i+3 < count; i += 4) { - /* emit last quad vertex as last triangle vertex */ - QUAD_LAST_PV( (i + 0), - (i + 1), - (i + 2), - (i + 3) ); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - /* GL quad strips don't follow provoking vertex convention */ - if (flatfirst) { - for (i = 0; i+3 < count; i += 2) { - /* emit last quad vertex as first triangle vertex */ - QUAD_FIRST_PV( (i + 3), - (i + 2), - (i + 0), - (i + 1) ); - - } - } - else { - for (i = 0; i+3 < count; i += 2) { - /* emit last quad vertex as last triangle vertex */ - QUAD_LAST_PV( (i + 2), - (i + 0), - (i + 1), - (i + 3) ); - } - } - break; - - case PIPE_PRIM_POLYGON: - /* GL polygons don't follow provoking vertex convention */ - { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; - const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; - const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; - - flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; - - for (i = 0; i+2 < count; i++, flags = edge_middle) { - - if (i + 3 == count) - flags |= edge_last; - - if (flatfirst) { - /* emit first polygon vertex as first triangle vertex */ - TRIANGLE( flags, - (0), - (i + 1), - (i + 2) ); - } - else { - /* emit first polygon vertex as last triangle vertex */ - TRIANGLE( flags, - (i + 1), - (i + 2), - (0)); - } - } - } - break; - - default: - assert(0); - break; - } - - FLUSH; -} - - -#undef TRIANGLE -#undef QUAD_FIRST_PV -#undef QUAD_LAST_PV -#undef POINT -#undef LINE -#undef FUNC +#include "draw_decompose_tmp.h" -- cgit v1.2.3 From f1fc444bb05421404f42e8d08e9085c6fb6ce7a9 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 4 Aug 2010 14:39:16 +0800 Subject: draw: Mask out vertex flags in GS and stream output. This fixes out-of-bound access to the vertices. --- src/gallium/auxiliary/draw/draw_gs.c | 2 +- src/gallium/auxiliary/draw/draw_pt_so_emit.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index f2535c5a54..4a1013e79a 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -380,7 +380,7 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, #define FUNC gs_run_elts #define LOCAL_VARS const ushort *elts = input_prims->elts; -#define GET_ELT(idx) (elts[idx]) +#define GET_ELT(idx) (elts[idx] & ~DRAW_PIPE_FLAG_MASK) #include "draw_gs_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index c86bdd99a3..f7f4f24d35 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -225,7 +225,7 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2) #define FUNC so_run_elts #define LOCAL_VARS const ushort *elts = input_prims->elts; -#define GET_ELT(idx) (elts[start + (idx)]) +#define GET_ELT(idx) (elts[start + (idx)] & ~DRAW_PIPE_FLAG_MASK) #include "draw_so_emit_tmp.h" -- cgit v1.2.3 From 642d5ba79abc6a231a5fdabb3454b9b082b0d7f8 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 4 Aug 2010 14:37:40 +0800 Subject: draw: Remove unnecessary vertex flag ANDs. Vertex flags are a contract between vcache and the pipeline. They are set only for the first vertex of a primitive. --- src/gallium/auxiliary/draw/draw_pipe.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 3421b826d9..070ac803c8 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -179,18 +179,17 @@ static void do_triangle( struct draw_context *draw, do_triangle( draw, \ i0, /* flags */ \ verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1 & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2 & ~DRAW_PIPE_FLAG_MASK) ) + verts + stride * (i1), \ + verts + stride * (i2) ) #define LINE(flags,i0,i1) \ do_line( draw, \ i0, /* flags */ \ verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1 & ~DRAW_PIPE_FLAG_MASK) ) + verts + stride * (i1) ) #define POINT(i0) \ - do_point( draw, \ - verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK) ) + do_point( draw, verts + stride * (i0) ) #define GET_ELT(idx) (elts[idx]) @@ -268,22 +267,19 @@ void draw_pipeline_run( struct draw_context *draw, * This code is for non-indexed (aka linear) rendering (no elts). */ -#define TRIANGLE(flags,i0,i1,i2) \ - do_triangle( draw, \ - flags, /* flags */ \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)) +#define TRIANGLE(flags,i0,i1,i2) \ + do_triangle( draw, flags, \ + verts + stride * (i0), \ + verts + stride * (i1), \ + verts + stride * (i2) ) -#define LINE(flags,i0,i1) \ - do_line( draw, \ - flags, \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK)) +#define LINE(flags,i0,i1) \ + do_line( draw, flags, \ + verts + stride * (i0), \ + verts + stride * (i1) ) -#define POINT(i0) \ - do_point( draw, \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) ) +#define POINT(i0) \ + do_point( draw, verts + stride * (i0) ) #define GET_ELT(idx) (idx) -- cgit v1.2.3 From d38afcd2f286e924e0f9b7f484712ac19e3f98fc Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 6 Aug 2010 16:57:04 +0800 Subject: draw: Avoid mixed declarations and code. Do not expand LOCAL_VARS to void expression. Otherwise, declarations and code will be mixed when more variables are declared in FUNC_ENTER. This fixes fdo bug #29416. --- src/gallium/auxiliary/draw/draw_decompose_tmp.h | 4 ++-- src/gallium/auxiliary/draw/draw_pt_decompose.h | 2 +- src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h index cb25202323..a52d2b5058 100644 --- a/src/gallium/auxiliary/draw/draw_decompose_tmp.h +++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h @@ -30,7 +30,7 @@ /* these macros are optional */ #ifndef LOCAL_VARS -#define LOCAL_VARS do {} while (0) +#define LOCAL_VARS #endif #ifndef FUNC_ENTER #define FUNC_ENTER do {} while (0) @@ -50,7 +50,7 @@ FUNC(FUNC_VARS) { unsigned idx[6], i; ushort flags; - LOCAL_VARS; + LOCAL_VARS FUNC_ENTER; diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h index e7ae9c4449..3127aad731 100644 --- a/src/gallium/auxiliary/draw/draw_pt_decompose.h +++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h @@ -2,6 +2,6 @@ char *verts = (char *) vertices; \ const boolean last_vertex_last = \ !(draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first) + draw->rasterizer->flatshade_first); #include "draw_decompose_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index 861ce1adaa..1a3748d5f0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -10,7 +10,7 @@ struct draw_context *draw = vcache->draw; \ const unsigned prim = vcache->input_prim; \ const boolean last_vertex_last = !(draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first) + draw->rasterizer->flatshade_first); #define GET_ELT(idx) (get_elt(elts, idx) + elt_bias) -- cgit v1.2.3 From 6ae39f6dca8f0968902642f04f1deb6f573edb6d Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 7 Aug 2010 02:14:50 -0600 Subject: draw: Assert that only the first vetex may have flags set. 642d5ba79abc6a231a5fdabb3454b9b082b0d7f8 removed flags masking for vertices other than the first one. Add assertions to be on the safe side. --- src/gallium/auxiliary/draw/draw_pipe.c | 36 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 070ac803c8..58995e0724 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -175,21 +175,31 @@ static void do_triangle( struct draw_context *draw, * higher bits of i0. Otherwise, flags do not matter. */ -#define TRIANGLE(flags,i0,i1,i2) \ - do_triangle( draw, \ - i0, /* flags */ \ - verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2) ) - -#define LINE(flags,i0,i1) \ - do_line( draw, \ - i0, /* flags */ \ - verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1) ) +#define TRIANGLE(flags,i0,i1,i2) \ + do { \ + assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \ + assert(!((i2) & DRAW_PIPE_FLAG_MASK)); \ + do_triangle( draw, \ + i0, /* flags */ \ + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1), \ + verts + stride * (i2) ); \ + } while (0) + +#define LINE(flags,i0,i1) \ + do { \ + assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \ + do_line( draw, \ + i0, /* flags */ \ + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1) ); \ + } while (0) #define POINT(i0) \ - do_point( draw, verts + stride * (i0) ) + do { \ + assert(!((i0) & DRAW_PIPE_FLAG_MASK)); \ + do_point( draw, verts + stride * (i0) ); \ + } while (0) #define GET_ELT(idx) (elts[idx]) -- cgit v1.2.3 From 600cd858d446bc1698a9b28f714f3fd6145316fb Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 6 Aug 2010 22:50:09 +0800 Subject: draw: Fix draw_pt_split_prim for primitives with adjacency. Some primitives with adjacency have their "incr" wrong. --- src/gallium/auxiliary/draw/draw_pt_util.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c index 3236d38e6a..182a597cca 100644 --- a/src/gallium/auxiliary/draw/draw_pt_util.c +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -53,7 +53,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) break; case PIPE_PRIM_LINES_ADJACENCY: *first = 4; - *incr = 2; + *incr = 4; break; case PIPE_PRIM_LINE_STRIP_ADJACENCY: *first = 4; @@ -65,7 +65,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) break; case PIPE_PRIM_TRIANGLES_ADJACENCY: *first = 6; - *incr = 3; + *incr = 6; break; case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_TRIANGLE_FAN: @@ -75,7 +75,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) break; case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: *first = 6; - *incr = 1; + *incr = 2; break; case PIPE_PRIM_QUADS: *first = 4; -- cgit v1.2.3 From e34c52da87990b1ec64602a20418cdb274deefa9 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 7 Aug 2010 00:50:32 +0800 Subject: draw: Add an assertion to varray's version of trim(). Assert that "first" is always smaller than "count" and add reasoning. It would be better to simply fix trim(), but it is used in tight loops right now. --- src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index a292346be9..55e43b2a71 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -1,6 +1,11 @@ static unsigned trim( unsigned count, unsigned first, unsigned incr ) { - return count - (count - first) % incr; + /* + * count either has been trimmed in draw_pt_arrays or is set to + * (driver)_fetch_max which is hopefully always larger than first. + */ + assert(count >= first); + return count - (count - first) % incr; } static void FUNC(struct draw_pt_front_end *frontend, -- cgit v1.2.3 From 14e9fbee1cef281c6849a5f2a6d2cc66bfd4b3fd Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 6 Aug 2010 15:09:41 -0600 Subject: gallium: remove stray semicolons --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 2 +- src/gallium/auxiliary/util/u_cpu_detect.c | 2 +- src/gallium/drivers/llvmpipe/lp_context.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index ef0888079c..60d8bcfa55 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -46,7 +46,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = { DEBUG_NAMED_VALUE_END }; -DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0); +DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0) #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 9e02040f6c..287ee006cf 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -34,7 +34,7 @@ #include "tgsi_iterate.h" -DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE); +DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE) typedef struct { diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 6f38d22285..b1a8c75b99 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -73,7 +73,7 @@ #endif -DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE); +DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE) struct util_cpu_caps util_cpu_caps; diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 28793682ed..7543bd7b2b 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -47,7 +47,7 @@ #include "lp_setup.h" -DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE) static void llvmpipe_destroy( struct pipe_context *pipe ) -- cgit v1.2.3 From 445e59057f69131fc0f1585f22c5f281d1d0f4a3 Mon Sep 17 00:00:00 2001 From: nobled Date: Fri, 6 Aug 2010 17:36:41 +0000 Subject: draw: Use the correct type for integers Two integers were being operated on as a vector of floats in draw_llvm_generate(). This bug got uncovered by fixing this bug: http://bugs.freedesktop.org/29407 --- src/gallium/auxiliary/draw/draw_llvm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index de99b00a81..993e1101d7 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -683,7 +683,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) unsigned i, j; struct lp_build_context bld; struct lp_build_loop_state lp_loop; - struct lp_type vs_type = lp_type_float_vec(32); const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; void *code; @@ -732,7 +731,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, vs_type); + lp_build_context_init(&bld, builder, lp_type_int(32)); end = lp_build_add(&bld, start, count); @@ -847,7 +846,6 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian struct lp_build_context bld; struct lp_build_context bld_int; struct lp_build_loop_state lp_loop; - struct lp_type vs_type = lp_type_float_vec(32); const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef fetch_max; @@ -899,7 +897,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, vs_type); + lp_build_context_init(&bld, builder, lp_type_float_vec(32)); lp_build_context_init(&bld_int, builder, lp_type_int(32)); step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); -- cgit v1.2.3 From b9a21fd6ca036763500e72ce5783867fdfb340e5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 8 Aug 2010 13:58:23 +0100 Subject: draw: Remove unused variable. --- src/gallium/auxiliary/draw/draw_llvm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 993e1101d7..8d53601d19 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -844,7 +844,6 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian struct draw_context *draw = llvm->draw; unsigned i, j; struct lp_build_context bld; - struct lp_build_context bld_int; struct lp_build_loop_state lp_loop; const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; @@ -897,8 +896,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, lp_type_float_vec(32)); - lp_build_context_init(&bld_int, builder, lp_type_int(32)); + lp_build_context_init(&bld, builder, lp_type_int(32)); step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); @@ -933,7 +931,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian /* make sure we're not out of bounds which can happen * if fetch_count % 4 != 0, because on the last iteration * a few of the 4 vertex fetches will be out of bounds */ - true_index = lp_build_min(&bld_int, true_index, fetch_max); + true_index = lp_build_min(&bld, true_index, fetch_max); fetch_ptr = LLVMBuildGEP(builder, fetch_elts, &true_index, 1, ""); -- cgit v1.2.3 From d8279728165eec2da6031cf543820acad322d192 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 8 Aug 2010 21:02:59 +0100 Subject: gallivm: Add type checks for the basic operations. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index f5f2623e46..98e8e4916d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -190,6 +190,9 @@ lp_build_add(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == bld->zero) return b; if(b == bld->zero) @@ -273,6 +276,9 @@ lp_build_sub(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(b == bld->zero) return a; if(a == bld->undef || b == bld->undef) @@ -395,6 +401,9 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef shift; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == bld->zero) return bld->zero; if(a == bld->one) @@ -518,6 +527,9 @@ lp_build_div(struct lp_build_context *bld, { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == bld->zero) return bld->zero; if(a == bld->one) -- cgit v1.2.3 From cd5af8c703d84dd856528554fa615e9787ebe75f Mon Sep 17 00:00:00 2001 From: nobled Date: Sun, 8 Aug 2010 20:17:30 +0000 Subject: gallivm: Use the correct context for integers See: http://bugs.freedesktop.org/29407 --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 21236839fb..048b29929a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -533,7 +533,7 @@ emit_fetch( reg->Register.Index * 4 + swizzle); /* index_vec = index_vec + addr_vec */ - index_vec = lp_build_add(&bld->base, index_vec, addr_vec); + index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec); /* Gather values from the constant buffer */ res = build_gather(bld, bld->consts_ptr, index_vec); -- cgit v1.2.3 From 12f5c0f9ce497e99854e0a3a7f5ff297a2a0a1e3 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 8 Aug 2010 22:18:53 +0100 Subject: gallivm: Fix more integer operations. --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 048b29929a..42d796cb95 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -489,7 +489,7 @@ get_indirect_offsets(struct lp_build_tgsi_soa_context *bld, int_vec_type, ""); /* addr_vec = addr_vec * 4 */ - addr_vec = lp_build_mul(&bld->base, addr_vec, vec4); + addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4); return addr_vec; } @@ -773,7 +773,9 @@ emit_store( addr = LLVMBuildExtractElement(bld->base.builder, addr, LLVMConstInt(LLVMInt32Type(), 0, 0), ""); - addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + addr = LLVMBuildMul(bld->base.builder, + addr, LLVMConstInt(LLVMInt32Type(), 4, 0), + ""); } switch( reg->Register.File ) { -- cgit v1.2.3 From 65b9747a54490dd56cd5cee4c2c1b9f51d35f133 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 9 Aug 2010 16:41:44 +0100 Subject: util: Move _mm_shuffle_epi8() to u_sse.h. It's bound to be useful elsewhere. --- src/gallium/auxiliary/util/u_sse.h | 29 +++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_tile_soa.py | 29 +---------------------------- 2 files changed, 30 insertions(+), 28 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 6145e34aa3..87959ab0aa 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -71,6 +71,35 @@ _mm_castps_si128(__m128 a) #endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ + +#if defined(PIPE_ARCH_SSSE3) + +#include + +#else /* !PIPE_ARCH_SSSE3 */ + +#include + +/** + * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases + * where -mssse3 is not supported/enabled. + * + * MSVC will never get in here as its intrinsics support do not rely on + * compiler command line options. + */ +static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_epi8(__m128i a, __m128i mask) +{ + __m128i result; + __asm__("pshufb %1, %0" + : "=x" (result) + : "xm" (mask), "0" (a)); + return result; +} + +#endif /* !PIPE_ARCH_SSSE3 */ + + #endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ #endif /* U_SSE_H_ */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index c71ec8066c..2ba39052ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -293,34 +293,7 @@ def generate_ssse3(): print ''' #if defined(PIPE_ARCH_SSE) - -#if defined(PIPE_ARCH_SSSE3) - -#include - -#else - -#include - -/** - * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases - * where -mssse3 is not supported/enabled. - * - * MSVC will never get in here as its intrinsics support do not rely on - * compiler command line options. - */ -static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_shuffle_epi8(__m128i a, __m128i mask) -{ - __m128i result; - __asm__("pshufb %1, %0" - : "=x" (result) - : "xm" (mask), "0" (a)); - return result; -} - -#endif - +#include "util/u_sse.h" static void lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, -- cgit v1.2.3 From fc9a49b638c26801951c33a570178bbb2b67ec60 Mon Sep 17 00:00:00 2001 From: nobled Date: Sun, 8 Aug 2010 19:44:54 +0000 Subject: gallivm: Always use floating-point operators for floating-point types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the assert added in LLVM 2.8: assert(getType()->isIntOrIntVectorTy() && "Tried to create an integer operation on a non-integer type!") But it also fixes some subtle bugs, since we should've been doing this since LLVM 2.6 anyway. Includes a modified patch from steckdenis@yahoo.fr for the FNeg instructions in emit_fetch(); thanks for pointing those out. http://bugs.freedesktop.org/29404 http://bugs.freedesktop.org/29407 Signed-off-by: José Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 181 +++++++++++++--------- src/gallium/auxiliary/gallivm/lp_bld_conv.c | 14 +- src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 4 +- src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 4 +- src/gallium/auxiliary/gallivm/lp_bld_quad.c | 4 +- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 12 +- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 10 +- 7 files changed, 137 insertions(+), 92 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 98e8e4916d..d2dde41e9f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -173,9 +173,15 @@ lp_build_comp(struct lp_build_context *bld, } if(LLVMIsConstant(a)) - return LLVMConstSub(bld->one, a); + if (type.floating) + return LLVMConstFSub(bld->one, a); + else + return LLVMConstSub(bld->one, a); else - return LLVMBuildSub(bld->builder, bld->one, a, ""); + if (type.floating) + return LLVMBuildFSub(bld->builder, bld->one, a, ""); + else + return LLVMBuildSub(bld->builder, bld->one, a, ""); } @@ -220,9 +226,15 @@ lp_build_add(struct lp_build_context *bld, } if(LLVMIsConstant(a) && LLVMIsConstant(b)) - res = LLVMConstAdd(a, b); + if (type.floating) + res = LLVMConstFAdd(a, b); + else + res = LLVMConstAdd(a, b); else - res = LLVMBuildAdd(bld->builder, a, b, ""); + if (type.floating) + res = LLVMBuildFAdd(bld->builder, a, b, ""); + else + res = LLVMBuildAdd(bld->builder, a, b, ""); /* clamp to ceiling of 1.0 */ if(bld->type.norm && (bld->type.floating || bld->type.fixed)) @@ -256,9 +268,16 @@ lp_build_sum_vector(struct lp_build_context *bld, for (i = 1; i < type.length; i++) { index = LLVMConstInt(LLVMInt32Type(), i, 0); - res = LLVMBuildAdd(bld->builder, res, - LLVMBuildExtractElement(bld->builder, a, index, ""), - ""); + if (type.floating) + res = LLVMBuildFAdd(bld->builder, res, + LLVMBuildExtractElement(bld->builder, + a, index, ""), + ""); + else + res = LLVMBuildAdd(bld->builder, res, + LLVMBuildExtractElement(bld->builder, + a, index, ""), + ""); } return res; @@ -306,9 +325,15 @@ lp_build_sub(struct lp_build_context *bld, } if(LLVMIsConstant(a) && LLVMIsConstant(b)) - res = LLVMConstSub(a, b); + if (type.floating) + res = LLVMConstFSub(a, b); + else + res = LLVMConstSub(a, b); else - res = LLVMBuildSub(bld->builder, a, b, ""); + if (type.floating) + res = LLVMBuildFSub(bld->builder, a, b, ""); + else + res = LLVMBuildSub(bld->builder, a, b, ""); if(bld->type.norm && (bld->type.floating || bld->type.fixed)) res = lp_build_max_simple(bld, res, bld->zero); @@ -442,7 +467,10 @@ lp_build_mul(struct lp_build_context *bld, shift = NULL; if(LLVMIsConstant(a) && LLVMIsConstant(b)) { - res = LLVMConstMul(a, b); + if (type.floating) + res = LLVMConstFMul(a, b); + else + res = LLVMConstMul(a, b); if(shift) { if(type.sign) res = LLVMConstAShr(res, shift); @@ -451,7 +479,10 @@ lp_build_mul(struct lp_build_context *bld, } } else { - res = LLVMBuildMul(bld->builder, a, b, ""); + if (type.floating) + res = LLVMBuildFMul(bld->builder, a, b, ""); + else + res = LLVMBuildMul(bld->builder, a, b, ""); if(shift) { if(type.sign) res = LLVMBuildAShr(bld->builder, res, shift, ""); @@ -481,7 +512,10 @@ lp_build_mul_imm(struct lp_build_context *bld, return a; if(b == -1) - return LLVMBuildNeg(bld->builder, a, ""); + if (bld->type.floating) + return LLVMBuildFNeg(bld->builder, a, ""); + else + return LLVMBuildNeg(bld->builder, a, ""); if(b == 2 && bld->type.floating) return lp_build_add(bld, a, a); @@ -714,7 +748,12 @@ LLVMValueRef lp_build_negate(struct lp_build_context *bld, LLVMValueRef a) { - return LLVMBuildNeg(bld->builder, a, ""); + if (bld->type.floating) + a = LLVMBuildFNeg(bld->builder, a, ""); + else + a = LLVMBuildNeg(bld->builder, a, ""); + + return a; } @@ -1033,7 +1072,7 @@ lp_build_iround(struct lp_build_context *bld, half = LLVMBuildOr(bld->builder, sign, half, ""); half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); - res = LLVMBuildAdd(bld->builder, a, half, ""); + res = LLVMBuildFAdd(bld->builder, a, half, ""); } res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); @@ -1082,7 +1121,7 @@ lp_build_ifloor(struct lp_build_context *bld, offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); - res = LLVMBuildAdd(bld->builder, a, offset, "ifloor.res"); + res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res"); } /* round to nearest (toward zero) */ @@ -1132,7 +1171,7 @@ lp_build_iceil(struct lp_build_context *bld, offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); - res = LLVMBuildAdd(bld->builder, a, offset, "iceil.res"); + res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res"); } /* round to nearest (toward zero) */ @@ -1197,9 +1236,9 @@ lp_build_rcp(struct lp_build_context *bld, rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); - res = LLVMBuildMul(bld->builder, a, rcp_a, ""); - res = LLVMBuildSub(bld->builder, two, res, ""); - res = LLVMBuildMul(bld->builder, res, rcp_a, ""); + res = LLVMBuildFMul(bld->builder, a, rcp_a, ""); + res = LLVMBuildFSub(bld->builder, two, res, ""); + res = LLVMBuildFMul(bld->builder, res, rcp_a, ""); return rcp_a; #else @@ -1282,7 +1321,7 @@ lp_build_sin(struct lp_build_context *bld, */ LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516); - LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y"); + LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y"); /* * store the integer part of y in mm0 @@ -1356,9 +1395,9 @@ lp_build_sin(struct lp_build_context *bld, * xmm2 = _mm_mul_ps(y, xmm2); * xmm3 = _mm_mul_ps(y, xmm3); */ - LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1"); - LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2"); - LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3"); + LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1"); + LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2"); + LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3"); /* * x = _mm_add_ps(x, xmm1); @@ -1366,16 +1405,16 @@ lp_build_sin(struct lp_build_context *bld, * x = _mm_add_ps(x, xmm3); */ - LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1"); - LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2"); - LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3"); + LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1"); + LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2"); + LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3"); /* * Evaluate the first polynom (0 <= x <= Pi/4) * * z = _mm_mul_ps(x,x); */ - LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z"); + LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z"); /* * _PS_CONST(coscof_p0, 2.443315711809948E-005); @@ -1390,12 +1429,12 @@ lp_build_sin(struct lp_build_context *bld, * y = *(v4sf*)_ps_coscof_p0; * y = _mm_mul_ps(y, z); */ - LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3"); - LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4"); - LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5"); - LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6"); - LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7"); - LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8"); + LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3"); + LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4"); + LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5"); + LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6"); + LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7"); + LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8"); /* @@ -1404,10 +1443,10 @@ lp_build_sin(struct lp_build_context *bld, * y = _mm_add_ps(y, *(v4sf*)_ps_1); */ LLVMValueRef half = lp_build_const_v4sf(0.5); - LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp"); - LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8"); + LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp"); + LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8"); LLVMValueRef one = lp_build_const_v4sf(1.0); - LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9"); + LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9"); /* * _PS_CONST(sincof_p0, -1.9515295891E-4); @@ -1431,13 +1470,13 @@ lp_build_sin(struct lp_build_context *bld, * y2 = _mm_add_ps(y2, x); */ - LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3"); - LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4"); - LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5"); - LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6"); - LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7"); - LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8"); - LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9"); + LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3"); + LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4"); + LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5"); + LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6"); + LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7"); + LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8"); + LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9"); /* * select the correct result from the two polynoms @@ -1493,7 +1532,7 @@ lp_build_cos(struct lp_build_context *bld, */ LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516); - LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y"); + LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y"); /* * store the integer part of y in mm0 @@ -1573,9 +1612,9 @@ lp_build_cos(struct lp_build_context *bld, * xmm2 = _mm_mul_ps(y, xmm2); * xmm3 = _mm_mul_ps(y, xmm3); */ - LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1"); - LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2"); - LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3"); + LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1"); + LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2"); + LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3"); /* * x = _mm_add_ps(x, xmm1); @@ -1583,16 +1622,16 @@ lp_build_cos(struct lp_build_context *bld, * x = _mm_add_ps(x, xmm3); */ - LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1"); - LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2"); - LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3"); + LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1"); + LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2"); + LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3"); /* * Evaluate the first polynom (0 <= x <= Pi/4) * * z = _mm_mul_ps(x,x); */ - LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z"); + LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z"); /* * _PS_CONST(coscof_p0, 2.443315711809948E-005); @@ -1607,12 +1646,12 @@ lp_build_cos(struct lp_build_context *bld, * y = *(v4sf*)_ps_coscof_p0; * y = _mm_mul_ps(y, z); */ - LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3"); - LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4"); - LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5"); - LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6"); - LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7"); - LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8"); + LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3"); + LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4"); + LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5"); + LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6"); + LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7"); + LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8"); /* @@ -1621,10 +1660,10 @@ lp_build_cos(struct lp_build_context *bld, * y = _mm_add_ps(y, *(v4sf*)_ps_1); */ LLVMValueRef half = lp_build_const_v4sf(0.5); - LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp"); - LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8"); + LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp"); + LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8"); LLVMValueRef one = lp_build_const_v4sf(1.0); - LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9"); + LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9"); /* * _PS_CONST(sincof_p0, -1.9515295891E-4); @@ -1648,13 +1687,13 @@ lp_build_cos(struct lp_build_context *bld, * y2 = _mm_add_ps(y2, x); */ - LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3"); - LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4"); - LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5"); - LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6"); - LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7"); - LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8"); - LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9"); + LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3"); + LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4"); + LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5"); + LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6"); + LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7"); + LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8"); + LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9"); /* * select the correct result from the two polynoms @@ -1829,7 +1868,7 @@ lp_build_exp2_approx(struct lp_build_context *bld, ipart = lp_build_floor(bld, x); /* fpart = x - ipart */ - fpart = LLVMBuildSub(bld->builder, x, ipart, ""); + fpart = LLVMBuildFSub(bld->builder, x, ipart, ""); } if(p_exp2_int_part || p_exp2) { @@ -1844,7 +1883,7 @@ lp_build_exp2_approx(struct lp_build_context *bld, expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, Elements(lp_build_exp2_polynomial)); - res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); + res = LLVMBuildFMul(bld->builder, expipart, expfpart, ""); } if(p_exp2_int_part) @@ -1957,9 +1996,9 @@ lp_build_log2_approx(struct lp_build_context *bld, Elements(lp_build_log2_polynomial)); /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ - logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); + logmant = LLVMBuildFMul(bld->builder, logmant, LLVMBuildFSub(bld->builder, mant, bld->one, ""), ""); - res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); + res = LLVMBuildFAdd(bld->builder, logmant, logexp, ""); } if(p_exp) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 77012f1fac..8b477313d4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -117,8 +117,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); - res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), ""); - res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), ""); + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { @@ -175,6 +175,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, double scale; double bias; + assert(dst_type.floating); + mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); @@ -199,8 +201,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, res = LLVMBuildBitCast(builder, res, vec_type, ""); - res = LLVMBuildSub(builder, res, bias_, ""); - res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), ""); + res = LLVMBuildFSub(builder, res, bias_, ""); + res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); return res; } @@ -296,7 +298,7 @@ lp_build_conv(LLVMBuilderRef builder, if (dst_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) - tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); + tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } /* Use an equally sized integer for intermediate computations */ @@ -391,7 +393,7 @@ lp_build_conv(LLVMBuilderRef builder, if (src_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) - tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); + tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 0f01fc1d75..247cb83ce6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -240,7 +240,7 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder, */ if (normalized) - scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), ""); + scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); else scaled = casted; @@ -322,7 +322,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, } if (normalized) - scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); + scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); else scaled = unswizzled; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 9f405921b0..c724a4453e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -197,7 +197,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, if (format_desc->channel[chan].normalized) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); LLVMValueRef scale_val = lp_build_const_vec(type, scale); - input = LLVMBuildMul(builder, input, scale_val, ""); + input = LLVMBuildFMul(builder, input, scale_val, ""); } } else { @@ -227,7 +227,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); LLVMValueRef scale_val = lp_build_const_vec(type, scale); input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); - input = LLVMBuildMul(builder, input, scale_val, ""); + input = LLVMBuildFMul(builder, input, scale_val, ""); } else { /* FIXME */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index ca36046d22..7b1088939b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -85,7 +85,7 @@ lp_build_scalar_ddx(struct lp_build_context *bld, LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0); LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, ""); LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, ""); - return LLVMBuildSub(bld->builder, a_right, a_left, ""); + return lp_build_sub(bld, a_right, a_left); } @@ -97,5 +97,5 @@ lp_build_scalar_ddy(struct lp_build_context *bld, LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0); LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, ""); LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, ""); - return LLVMBuildSub(bld->builder, a_bottom, a_top, ""); + return lp_build_sub(bld, a_bottom, a_top); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 1a20d74cac..955d328953 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -888,17 +888,17 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, /* Compute rho = max of all partial derivatives scaled by texture size. * XXX this could be vectorized somewhat */ - rho = LLVMBuildMul(bld->builder, + rho = LLVMBuildFMul(bld->builder, lp_build_max(float_bld, dsdx, dsdy), lp_build_int_to_float(float_bld, width), ""); if (dims > 1) { LLVMValueRef max; - max = LLVMBuildMul(bld->builder, + max = LLVMBuildFMul(bld->builder, lp_build_max(float_bld, dtdx, dtdy), lp_build_int_to_float(float_bld, height), ""); rho = lp_build_max(float_bld, rho, max); if (dims > 2) { - max = LLVMBuildMul(bld->builder, + max = LLVMBuildFMul(bld->builder, lp_build_max(float_bld, drdx, drdy), lp_build_int_to_float(float_bld, depth), ""); rho = lp_build_max(float_bld, rho, max); @@ -912,12 +912,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, if (lod_bias) { lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias, index0, ""); - lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); + lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); } } /* add sampler lod bias */ - lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); + lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); /* clamp lod */ lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); @@ -2029,6 +2029,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, debug_printf("Sample from %s\n", util_format_name(fmt)); } + assert(type.floating); + /* Setup our build context */ memset(&bld, 0, sizeof bld); bld.builder = builder; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 42d796cb95..becbd3bece 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -506,6 +506,7 @@ emit_fetch( const unsigned chan_index ) { const struct tgsi_full_src_register *reg = &inst->Src[src_op]; + const struct lp_type type = bld->base.type; const unsigned swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); LLVMValueRef res; @@ -612,11 +613,12 @@ emit_fetch( case TGSI_UTIL_SIGN_SET: /* TODO: Use bitwese OR for floating point */ res = lp_build_abs( &bld->base, res ); - res = LLVMBuildNeg( bld->base.builder, res, "" ); - break; - + /* fall through */ case TGSI_UTIL_SIGN_TOGGLE: - res = LLVMBuildNeg( bld->base.builder, res, "" ); + if (type.floating) + res = LLVMBuildFNeg( bld->base.builder, res, "" ); + else + res = LLVMBuildNeg( bld->base.builder, res, "" ); break; case TGSI_UTIL_SIGN_KEEP: -- cgit v1.2.3 From 8a3a971743a90463e65b44f1769a5301a31ce4cd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 9 Aug 2010 17:26:18 +0100 Subject: gallivm: Don't call LLVMBuildFNeg on llvm-2.6. It didn't exist yet. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 7 +++---- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 6 +----- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index d2dde41e9f..cecc1858bc 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -512,10 +512,7 @@ lp_build_mul_imm(struct lp_build_context *bld, return a; if(b == -1) - if (bld->type.floating) - return LLVMBuildFNeg(bld->builder, a, ""); - else - return LLVMBuildNeg(bld->builder, a, ""); + return lp_build_negate(bld, a); if(b == 2 && bld->type.floating) return lp_build_add(bld, a, a); @@ -748,9 +745,11 @@ LLVMValueRef lp_build_negate(struct lp_build_context *bld, LLVMValueRef a) { +#if HAVE_LLVM >= 0x0207 if (bld->type.floating) a = LLVMBuildFNeg(bld->builder, a, ""); else +#endif a = LLVMBuildNeg(bld->builder, a, ""); return a; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index becbd3bece..0aa64affac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -506,7 +506,6 @@ emit_fetch( const unsigned chan_index ) { const struct tgsi_full_src_register *reg = &inst->Src[src_op]; - const struct lp_type type = bld->base.type; const unsigned swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); LLVMValueRef res; @@ -615,10 +614,7 @@ emit_fetch( res = lp_build_abs( &bld->base, res ); /* fall through */ case TGSI_UTIL_SIGN_TOGGLE: - if (type.floating) - res = LLVMBuildFNeg( bld->base.builder, res, "" ); - else - res = LLVMBuildNeg( bld->base.builder, res, "" ); + res = lp_build_negate( &bld->base, res ); break; case TGSI_UTIL_SIGN_KEEP: -- cgit v1.2.3 From 6e1f9bc8f62baf3854a53bf67bb025790f2cb317 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 9 Aug 2010 17:30:33 +0100 Subject: gallivm: More type checks. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index cecc1858bc..ec9b53be80 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -160,6 +160,8 @@ lp_build_comp(struct lp_build_context *bld, { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + if(a == bld->one) return bld->zero; if(a == bld->zero) @@ -255,6 +257,8 @@ lp_build_sum_vector(struct lp_build_context *bld, LLVMValueRef index, res; unsigned i; + assert(lp_check_value(type, a)); + if (a == bld->zero) return bld->zero; if (a == bld->undef) @@ -505,6 +509,8 @@ lp_build_mul_imm(struct lp_build_context *bld, { LLVMValueRef factor; + assert(lp_check_value(bld->type, a)); + if(b == 0) return bld->zero; @@ -598,6 +604,10 @@ lp_build_lerp(struct lp_build_context *bld, LLVMValueRef delta; LLVMValueRef res; + assert(lp_check_value(bld->type, x)); + assert(lp_check_value(bld->type, v0)); + assert(lp_check_value(bld->type, v1)); + delta = lp_build_sub(bld, v1, v0); res = lp_build_mul(bld, x, delta); @@ -639,6 +649,9 @@ lp_build_min(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + assert(lp_check_value(bld->type, a)); + assert(lp_check_value(bld->type, b)); + if(a == bld->undef || b == bld->undef) return bld->undef; @@ -667,6 +680,9 @@ lp_build_max(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + assert(lp_check_value(bld->type, a)); + assert(lp_check_value(bld->type, b)); + if(a == bld->undef || b == bld->undef) return bld->undef; @@ -696,6 +712,10 @@ lp_build_clamp(struct lp_build_context *bld, LLVMValueRef min, LLVMValueRef max) { + assert(lp_check_value(bld->type, a)); + assert(lp_check_value(bld->type, min)); + assert(lp_check_value(bld->type, max)); + a = lp_build_min(bld, a, max); a = lp_build_max(bld, a, min); return a; @@ -712,6 +732,8 @@ lp_build_abs(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); + assert(lp_check_value(type, a)); + if(!type.sign) return a; @@ -745,6 +767,8 @@ LLVMValueRef lp_build_negate(struct lp_build_context *bld, LLVMValueRef a) { + assert(lp_check_value(bld->type, a)); + #if HAVE_LLVM >= 0x0207 if (bld->type.floating) a = LLVMBuildFNeg(bld->builder, a, ""); @@ -765,6 +789,8 @@ lp_build_sgn(struct lp_build_context *bld, LLVMValueRef cond; LLVMValueRef res; + assert(lp_check_value(type, a)); + /* Handle non-zero case */ if(!type.sign) { /* if not zero then sign must be positive */ @@ -822,6 +848,7 @@ lp_build_set_sign(struct lp_build_context *bld, ~((unsigned long long) 1 << (type.width - 1))); LLVMValueRef val, res; + assert(lp_check_value(type, a)); assert(type.floating); /* val = reinterpret_cast(a) */ @@ -1188,6 +1215,8 @@ lp_build_sqrt(struct lp_build_context *bld, LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; + assert(lp_check_value(type, a)); + /* TODO: optimize the constant case */ /* TODO: optimize the constant case */ @@ -1204,6 +1233,8 @@ lp_build_rcp(struct lp_build_context *bld, { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + if(a == bld->zero) return bld->undef; if(a == bld->one) @@ -1258,6 +1289,8 @@ lp_build_rsqrt(struct lp_build_context *bld, { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + assert(type.floating); if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) @@ -1745,6 +1778,8 @@ lp_build_exp(struct lp_build_context *bld, /* log2(e) = 1/log(2) */ LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634); + assert(lp_check_value(bld->type, x)); + return lp_build_mul(bld, log2e, lp_build_exp2(bld, x)); } @@ -1759,6 +1794,8 @@ lp_build_log(struct lp_build_context *bld, /* log(2) */ LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529); + assert(lp_check_value(bld->type, x)); + return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); } @@ -1781,6 +1818,8 @@ lp_build_polynomial(struct lp_build_context *bld, LLVMValueRef res = NULL; unsigned i; + assert(lp_check_value(bld->type, x)); + /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", @@ -1852,6 +1891,8 @@ lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef expfpart = NULL; LLVMValueRef res = NULL; + assert(lp_check_value(bld->type, x)); + if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) @@ -1965,6 +2006,8 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef logmant = NULL; LLVMValueRef res = NULL; + assert(lp_check_value(bld->type, x)); + if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) -- cgit v1.2.3 From a44a6960fab8c0053678fe74ce4c978ef40b06ff Mon Sep 17 00:00:00 2001 From: nobled Date: Mon, 9 Aug 2010 21:15:08 +0000 Subject: gallivm: Even more type checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit http://bugs.freedesktop.org/29407 Signed-off-by: José Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 12 +++++++++++- src/gallium/auxiliary/gallivm/lp_bld_logic.c | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index ec9b53be80..860fbd829b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -72,6 +72,9 @@ lp_build_min_simple(struct lp_build_context *bld, const char *intrinsic = NULL; LLVMValueRef cond; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + /* TODO: optimize the constant case */ if(type.width * type.length == 128) { @@ -118,6 +121,9 @@ lp_build_max_simple(struct lp_build_context *bld, const char *intrinsic = NULL; LLVMValueRef cond; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + /* TODO: optimize the constant case */ if(type.width * type.length == 128) { @@ -395,6 +401,10 @@ lp_build_mul_u8n(LLVMBuilderRef builder, LLVMValueRef c8; LLVMValueRef ab; + assert(!i16_type.floating); + assert(lp_check_value(i16_type, a)); + assert(lp_check_value(i16_type, b)); + c8 = lp_build_const_int_vec(i16_type, 8); #if 0 @@ -848,8 +858,8 @@ lp_build_set_sign(struct lp_build_context *bld, ~((unsigned long long) 1 << (type.width - 1))); LLVMValueRef val, res; - assert(lp_check_value(type, a)); assert(type.floating); + assert(lp_check_value(type, a)); /* val = reinterpret_cast(a) */ val = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index ab4ddb81c4..96f8e21fc6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -83,6 +83,8 @@ lp_build_compare(LLVMBuilderRef builder, assert(func >= PIPE_FUNC_NEVER); assert(func <= PIPE_FUNC_ALWAYS); + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); if(func == PIPE_FUNC_NEVER) return zeros; @@ -374,6 +376,9 @@ lp_build_select_bitwise(struct lp_build_context *bld, struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if (a == b) { return a; } @@ -419,6 +424,9 @@ lp_build_select(struct lp_build_context *bld, struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == b) return a; @@ -484,6 +492,9 @@ lp_build_select_aos(struct lp_build_context *bld, const unsigned n = type.length; unsigned i, j; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == b) return a; if(cond[0] && cond[1] && cond[2] && cond[3]) @@ -539,7 +550,11 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + assert(lp_check_value(bld->type, a)); + assert(lp_check_value(bld->type, b)); + b = LLVMBuildNot(bld->builder, b, ""); b = LLVMBuildAnd(bld->builder, a, b, ""); + return b; } -- cgit v1.2.3 From 20b3e40f166c77bd7fa5b7171e5b4169ed035280 Mon Sep 17 00:00:00 2001 From: nobled Date: Mon, 9 Aug 2010 21:25:18 +0000 Subject: gallivm: Fix bitwise operations for floats, division for integers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit http://bugs.freedesktop.org/29407 Signed-off-by: José Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 17 ++++++++++++++--- src/gallium/auxiliary/gallivm/lp_bld_logic.c | 15 +++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 860fbd829b..cf2feeb163 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -588,13 +588,24 @@ lp_build_div(struct lp_build_context *bld, if(a == bld->undef || b == bld->undef) return bld->undef; - if(LLVMIsConstant(a) && LLVMIsConstant(b)) - return LLVMConstFDiv(a, b); + if(LLVMIsConstant(a) && LLVMIsConstant(b)) { + if (type.floating) + return LLVMConstFDiv(a, b); + else if (type.sign) + return LLVMConstSDiv(a, b); + else + return LLVMConstUDiv(a, b); + } if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_mul(bld, a, lp_build_rcp(bld, b)); - return LLVMBuildFDiv(bld->builder, a, b, ""); + if (type.floating) + return LLVMBuildFDiv(bld->builder, a, b, ""); + else if (type.sign) + return LLVMBuildSDiv(bld->builder, a, b, ""); + else + return LLVMBuildUDiv(bld->builder, a, b, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 96f8e21fc6..7d7db3b0d9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -550,11 +550,22 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - assert(lp_check_value(bld->type, a)); - assert(lp_check_value(bld->type, b)); + const struct lp_type type = bld->type; + + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + + /* can't do bitwise ops on floating-point values */ + if(type.floating) { + a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, ""); + b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, ""); + } b = LLVMBuildNot(bld->builder, b, ""); b = LLVMBuildAnd(bld->builder, a, b, ""); + if(type.floating) { + b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, ""); + } return b; } -- cgit v1.2.3 From f263fdee8146719b14d9f9b14cf0c224461f35dc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 10 Aug 2010 08:56:20 -0600 Subject: gallivm: fix non-SSE4.1 case in lp_build_pack2() Since there's no SSE instruction for this case, fall through to the generic shuffle code. Fixes bug fd.o 29468. --- src/gallium/auxiliary/gallivm/lp_bld_pack.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 7748f8f099..ecfb13a0d4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -261,13 +261,14 @@ lp_build_pack2(LLVMBuilderRef builder, #endif LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); LLVMValueRef shuffle; - LLVMValueRef res; + LLVMValueRef res = NULL; assert(!src_type.floating); assert(!dst_type.floating); assert(src_type.width == dst_type.width * 2); assert(src_type.length * 2 == dst_type.length); + /* Check for special cases first */ if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { switch(src_type.width) { case 32: @@ -283,8 +284,8 @@ lp_build_pack2(LLVMBuilderRef builder, return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); } else { - assert(0); - return LLVMGetUndef(dst_vec_type); + /* use generic shuffle below */ + res = NULL; } } break; @@ -310,10 +311,13 @@ lp_build_pack2(LLVMBuilderRef builder, break; } - res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); - return res; + if (res) { + res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); + return res; + } } + /* generic shuffle */ lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); -- cgit v1.2.3 From 247b253d326819e9c62801d741b3740a27aeaf8a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 10 Aug 2010 16:32:29 +0100 Subject: util: Add util_format_srgb(). To convert RGB -> SRGB format. --- src/gallium/auxiliary/util/u_format.h | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 38254b1096..8e786a390a 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -630,6 +630,44 @@ util_format_has_alpha(enum pipe_format format) } } +/** + * Return the matching SRGB format, or PIPE_FORMAT_NONE if none. + */ +static INLINE enum pipe_format +util_format_srgb(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L8_UNORM: + return PIPE_FORMAT_L8_SRGB; + case PIPE_FORMAT_L8A8_UNORM: + return PIPE_FORMAT_L8A8_SRGB; + case PIPE_FORMAT_R8G8B8_UNORM: + return PIPE_FORMAT_R8G8B8_SRGB; + case PIPE_FORMAT_A8B8G8R8_UNORM: + return PIPE_FORMAT_A8B8G8R8_SRGB; + case PIPE_FORMAT_X8B8G8R8_UNORM: + return PIPE_FORMAT_X8B8G8R8_SRGB; + case PIPE_FORMAT_B8G8R8A8_UNORM: + return PIPE_FORMAT_B8G8R8A8_SRGB; + case PIPE_FORMAT_B8G8R8X8_UNORM: + return PIPE_FORMAT_B8G8R8X8_SRGB; + case PIPE_FORMAT_A8R8G8B8_UNORM: + return PIPE_FORMAT_A8R8G8B8_SRGB; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return PIPE_FORMAT_X8R8G8B8_SRGB; + case PIPE_FORMAT_DXT1_RGB: + return PIPE_FORMAT_DXT1_SRGB; + case PIPE_FORMAT_DXT1_RGBA: + return PIPE_FORMAT_DXT1_SRGBA; + case PIPE_FORMAT_DXT3_RGBA: + return PIPE_FORMAT_DXT3_SRGBA; + case PIPE_FORMAT_DXT5_RGBA: + return PIPE_FORMAT_DXT5_SRGBA; + default: + return PIPE_FORMAT_NONE; + } +} + /** * Return the number of components stored. * Formats with block size != 1x1 will always have 1 component (the block). -- cgit v1.2.3 From 0dcf0f9dfaa23b08d2bc20f8cbd02550c2632e52 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 10:46:12 +0200 Subject: auxiliary: move Ben Skeggs' primitive splitter to common code This is a simple framework that handles splitting primitives in an abstract way. The user has to specify the primitive start, start index and count. Then, it can ask the primitive splitter to "draw" a chunk of the primitive, staying under a given vertex/index budget. The primitive splitter will then call user-supplied functions to emit a range of vertices/indices, as well as switch the edgeflag on or off. This is particularly useful for hardware that either has limits on the vertex count field, or where vertices are pushed on a FIFO or temporary buffer of limited size. Note that unlike other splitters, it does not manipulate data in any way, and merely asks a callback to do so, in vertex intervals. --- src/gallium/auxiliary/util/u_split_prim.h | 102 +++++++++++++++++++++++++++++ src/gallium/drivers/nouveau/nouveau_util.h | 100 ---------------------------- src/gallium/drivers/nv50/nv50_push.c | 2 +- src/gallium/drivers/nv50/nv50_vbo.c | 2 +- 4 files changed, 104 insertions(+), 102 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_split_prim.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h new file mode 100644 index 0000000000..3c438da0ba --- /dev/null +++ b/src/gallium/auxiliary/util/u_split_prim.h @@ -0,0 +1,102 @@ +/* Originally written by Ben Skeggs for the nv50 driver*/ +#include + +struct u_split_prim { + void *priv; + void (*emit)(void *priv, unsigned start, unsigned count); + void (*edge)(void *priv, boolean enabled); + + unsigned mode; + unsigned start; + unsigned p_start; + unsigned p_end; + + uint repeat_first:1; + uint close_first:1; + uint edgeflag_off:1; +}; + +static INLINE void +u_split_prim_init(struct u_split_prim *s, + unsigned mode, unsigned start, unsigned count) +{ + if (mode == PIPE_PRIM_LINE_LOOP) { + s->mode = PIPE_PRIM_LINE_STRIP; + s->close_first = 1; + } else { + s->mode = mode; + s->close_first = 0; + } + s->start = start; + s->p_start = start; + s->p_end = start + count; + s->edgeflag_off = 0; + s->repeat_first = 0; +} + +static INLINE boolean +u_split_prim_next(struct u_split_prim *s, unsigned max_verts) +{ + int repeat = 0; + + if (s->repeat_first) { + s->emit(s->priv, s->start, 1); + max_verts--; + if (s->edgeflag_off) { + s->edge(s->priv, TRUE); + s->edgeflag_off = FALSE; + } + } + + if (s->p_start + s->close_first + max_verts >= s->p_end) { + s->emit(s->priv, s->p_start, s->p_end - s->p_start); + if (s->close_first) + s->emit(s->priv, s->start, 1); + return TRUE; + } + + switch (s->mode) { + case PIPE_PRIM_LINES: + max_verts &= ~1; + break; + case PIPE_PRIM_LINE_STRIP: + repeat = 1; + break; + case PIPE_PRIM_POLYGON: + max_verts--; + s->emit(s->priv, s->p_start, max_verts); + s->edge(s->priv, FALSE); + s->emit(s->priv, s->p_start + max_verts, 1); + s->p_start += max_verts; + s->repeat_first = TRUE; + s->edgeflag_off = TRUE; + return FALSE; + case PIPE_PRIM_TRIANGLES: + max_verts = max_verts - (max_verts % 3); + break; + case PIPE_PRIM_TRIANGLE_STRIP: + /* to ensure winding stays correct, always split + * on an even number of generated triangles + */ + max_verts = max_verts & ~1; + repeat = 2; + break; + case PIPE_PRIM_TRIANGLE_FAN: + s->repeat_first = TRUE; + repeat = 1; + break; + case PIPE_PRIM_QUADS: + max_verts &= ~3; + break; + case PIPE_PRIM_QUAD_STRIP: + max_verts &= ~1; + repeat = 2; + break; + default: + break; + } + + s->emit (s->priv, s->p_start, max_verts); + s->p_start += (max_verts - repeat); + return FALSE; +} diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h index a5e8537533..b165f7a611 100644 --- a/src/gallium/drivers/nouveau/nouveau_util.h +++ b/src/gallium/drivers/nouveau/nouveau_util.h @@ -88,104 +88,4 @@ static INLINE unsigned log2i(unsigned i) return r; } -struct u_split_prim { - void *priv; - void (*emit)(void *priv, unsigned start, unsigned count); - void (*edge)(void *priv, boolean enabled); - - unsigned mode; - unsigned start; - unsigned p_start; - unsigned p_end; - - uint repeat_first:1; - uint close_first:1; - uint edgeflag_off:1; -}; - -static INLINE void -u_split_prim_init(struct u_split_prim *s, - unsigned mode, unsigned start, unsigned count) -{ - if (mode == PIPE_PRIM_LINE_LOOP) { - s->mode = PIPE_PRIM_LINE_STRIP; - s->close_first = 1; - } else { - s->mode = mode; - s->close_first = 0; - } - s->start = start; - s->p_start = start; - s->p_end = start + count; - s->edgeflag_off = 0; - s->repeat_first = 0; -} - -static INLINE boolean -u_split_prim_next(struct u_split_prim *s, unsigned max_verts) -{ - int repeat = 0; - - if (s->repeat_first) { - s->emit(s->priv, s->start, 1); - max_verts--; - if (s->edgeflag_off) { - s->edge(s->priv, TRUE); - s->edgeflag_off = FALSE; - } - } - - if (s->p_start + s->close_first + max_verts >= s->p_end) { - s->emit(s->priv, s->p_start, s->p_end - s->p_start); - if (s->close_first) - s->emit(s->priv, s->start, 1); - return TRUE; - } - - switch (s->mode) { - case PIPE_PRIM_LINES: - max_verts &= ~1; - break; - case PIPE_PRIM_LINE_STRIP: - repeat = 1; - break; - case PIPE_PRIM_POLYGON: - max_verts--; - s->emit(s->priv, s->p_start, max_verts); - s->edge(s->priv, FALSE); - s->emit(s->priv, s->p_start + max_verts, 1); - s->p_start += max_verts; - s->repeat_first = TRUE; - s->edgeflag_off = TRUE; - return FALSE; - case PIPE_PRIM_TRIANGLES: - max_verts = max_verts - (max_verts % 3); - break; - case PIPE_PRIM_TRIANGLE_STRIP: - /* to ensure winding stays correct, always split - * on an even number of generated triangles - */ - max_verts = max_verts & ~1; - repeat = 2; - break; - case PIPE_PRIM_TRIANGLE_FAN: - s->repeat_first = TRUE; - repeat = 1; - break; - case PIPE_PRIM_QUADS: - max_verts &= ~3; - break; - case PIPE_PRIM_QUAD_STRIP: - max_verts &= ~1; - repeat = 2; - break; - default: - break; - } - - s->emit (s->priv, s->p_start, max_verts); - s->p_start += (max_verts - repeat); - return FALSE; -} - #endif diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c index c3ac804146..ee87144dcf 100644 --- a/src/gallium/drivers/nv50/nv50_push.c +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -2,8 +2,8 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_split_prim.h" -#include "nouveau/nouveau_util.h" #include "nv50_context.h" #include "nv50_resource.h" diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index e7f8fe33ed..0937668b84 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -24,8 +24,8 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_split_prim.h" -#include "nouveau/nouveau_util.h" #include "nv50_context.h" #include "nv50_resource.h" -- cgit v1.2.3 From eee5cea385b6871fa934a7882b2f214e3cbace8b Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 10:51:28 +0200 Subject: auxiliary: fix u_split_prim naming convention Current practice is to start identifiers with "util_" instead of "u_". --- src/gallium/auxiliary/util/u_split_prim.h | 6 +++--- src/gallium/drivers/nv50/nv50_push.c | 6 +++--- src/gallium/drivers/nv50/nv50_vbo.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h index 3c438da0ba..e526a73fc3 100644 --- a/src/gallium/auxiliary/util/u_split_prim.h +++ b/src/gallium/auxiliary/util/u_split_prim.h @@ -1,7 +1,7 @@ /* Originally written by Ben Skeggs for the nv50 driver*/ #include -struct u_split_prim { +struct util_split_prim { void *priv; void (*emit)(void *priv, unsigned start, unsigned count); void (*edge)(void *priv, boolean enabled); @@ -17,7 +17,7 @@ struct u_split_prim { }; static INLINE void -u_split_prim_init(struct u_split_prim *s, +util_split_prim_init(struct util_split_prim *s, unsigned mode, unsigned start, unsigned count) { if (mode == PIPE_PRIM_LINE_LOOP) { @@ -35,7 +35,7 @@ u_split_prim_init(struct u_split_prim *s, } static INLINE boolean -u_split_prim_next(struct u_split_prim *s, unsigned max_verts) +util_split_prim_next(struct util_split_prim *s, unsigned max_verts) { int repeat = 0; diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c index ee87144dcf..6a2ffd5a3c 100644 --- a/src/gallium/drivers/nv50/nv50_push.c +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -217,7 +217,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe, 4; /* potential edgeflag enable/disable */ const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */ 2; /* potential edgeflag modification */ - struct u_split_prim s; + struct util_split_prim s; unsigned vtx_size; boolean nzi = FALSE; int i; @@ -335,7 +335,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe, ctx.attr[i].map = (uint8_t *)ctx.attr[i].map + ctx.attr[i].stride; } - u_split_prim_init(&s, mode, start, count); + util_split_prim_init(&s, mode, start, count); do { if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) { FIRE_RING(chan); @@ -351,7 +351,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0)); - done = u_split_prim_next(&s, max_verts); + done = util_split_prim_next(&s, max_verts); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); } while (!done); diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 0937668b84..1f11950199 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -311,7 +311,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, struct pipe_transfer *transfer; struct instance a[16]; struct inline_ctx ctx; - struct u_split_prim s; + struct util_split_prim s; boolean nzi = FALSE; unsigned overhead; @@ -347,7 +347,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, unsigned max_verts; boolean done; - u_split_prim_init(&s, mode, start, count); + util_split_prim_init(&s, mode, start, count); do { if (AVAIL_RING(chan) < (overhead + 6)) { FIRE_RING(chan); @@ -366,7 +366,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0)); - done = u_split_prim_next(&s, max_verts); + done = util_split_prim_next(&s, max_verts); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); } while (!done); -- cgit v1.2.3 From 58b104d7f0890434aadbdebcd6002ba0a0e132ec Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 10:54:31 +0200 Subject: auxiliary: make primitive splitter assert on unimplemented adjacency prims They are unimplemented, even though the framework makes it possible to implement them well, and nv50 needs them. --- src/gallium/auxiliary/util/u_split_prim.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h index e526a73fc3..206e1ec311 100644 --- a/src/gallium/auxiliary/util/u_split_prim.h +++ b/src/gallium/auxiliary/util/u_split_prim.h @@ -92,8 +92,11 @@ util_split_prim_next(struct util_split_prim *s, unsigned max_verts) max_verts &= ~1; repeat = 2; break; - default: + case PIPE_PRIM_POINTS: break; + default: + /* TODO: implement adjacency primitives */ + assert(0); } s->emit (s->priv, s->p_start, max_verts); -- cgit v1.2.3 From 24f5ebb1d735ca7e8944b346359de5941e707047 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 6 Aug 2010 09:53:15 +0200 Subject: u_surfaces: fix surface leak due to off by one --- src/gallium/auxiliary/util/u_surfaces.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c index b5d21570d5..f6e5801ef6 100644 --- a/src/gallium/auxiliary/util/u_surfaces.c +++ b/src/gallium/auxiliary/util/u_surfaces.c @@ -99,7 +99,7 @@ util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void ( if(us->u.array) { unsigned i; - for(i = 0; i < pt->last_level; ++i) + for(i = 0; i <= pt->last_level; ++i) { struct pipe_surface *ps = us->u.array[i]; if(ps) -- cgit v1.2.3 From e45b2ce2c3a52e9f8e6fb7e933ab2f95eec15be1 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 6 Aug 2010 09:46:14 +0200 Subject: u_surfaces: use cso_hash instead of util_hash_table Using cso_hash directly is the right thing since util_hash_table adds useless overhead and is harder to use for this application. --- src/gallium/auxiliary/util/u_surfaces.c | 79 ++++++++++++--------------------- src/gallium/auxiliary/util/u_surfaces.h | 5 +-- 2 files changed, 31 insertions(+), 53 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c index f6e5801ef6..7733ad24d0 100644 --- a/src/gallium/auxiliary/util/u_surfaces.c +++ b/src/gallium/auxiliary/util/u_surfaces.c @@ -3,40 +3,22 @@ #include "util/u_inlines.h" #include "util/u_memory.h" -/* TODO: ouch, util_hash_table should do these by default when passed a null function pointer - * this indirect function call is quite bad - */ -static unsigned -hash(void *key) -{ - return (unsigned)(uintptr_t)key; -} - -static int -compare(void *key1, void *key2) -{ - return (unsigned)(uintptr_t)key1 - (unsigned)(uintptr_t)key2; -} - struct pipe_surface * util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { struct pipe_surface *ps; - void *key = NULL; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - if(!us->u.table) - us->u.table = util_hash_table_create(hash, compare); - key = (void *)(uintptr_t)(((zslice + face) << 8) | level); - /* TODO: ouch, should have a get-reference function... - * also, shouldn't allocate a two-pointer structure for each item... */ - ps = util_hash_table_get(us->u.table, key); + { /* or 2D array */ + if(!us->u.hash) + us->u.hash = cso_hash_create(); + + ps = cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level)); } else { if(!us->u.array) - us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *)); + us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *)); ps = us->u.array[level]; } @@ -54,7 +36,7 @@ util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, str ps->offset = ~0; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - util_hash_table_set(us->u.table, key, ps); + cso_hash_insert(us->u.hash, ((zslice + face) << 8) | level, ps); else us->u.array[level] = ps; @@ -66,47 +48,44 @@ util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps) { struct pipe_resource *pt = ps->texture; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - void* key = (void*)(uintptr_t)(((ps->zslice + ps->face) << 8) | ps->level); - util_hash_table_remove(us->u.table, key); + { /* or 2D array */ + cso_hash_erase(us->u.hash, cso_hash_find(us->u.hash, ((ps->zslice + ps->face) << 8) | ps->level)); } else us->u.array[ps->level] = 0; } -static enum pipe_error -util_surfaces_destroy_callback(void *key, void *value, void *data) -{ - void (*destroy_surface) (struct pipe_surface * ps) = data; - destroy_surface((struct pipe_surface *)value); - return PIPE_OK; -} - void util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void (*destroy_surface) (struct pipe_surface *)) { if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - if(us->u.table) + { /* or 2D array */ + if(us->u.hash) { - util_hash_table_foreach(us->u.table, util_surfaces_destroy_callback, destroy_surface); - util_hash_table_destroy(us->u.table); - us->u.table = NULL; + struct cso_hash_iter iter; + iter = cso_hash_first_node(us->u.hash); + while (!cso_hash_iter_is_null(iter)) { + destroy_surface(cso_hash_iter_data(iter)); + iter = cso_hash_iter_next(iter); + } + + cso_hash_delete(us->u.hash); + us->u.hash = NULL; } } else { if(us->u.array) { - unsigned i; - for(i = 0; i <= pt->last_level; ++i) - { - struct pipe_surface *ps = us->u.array[i]; - if(ps) - destroy_surface(ps); - } - FREE(us->u.array); - us->u.array = NULL; + unsigned i; + for(i = 0; i <= pt->last_level; ++i) + { + struct pipe_surface *ps = us->u.array[i]; + if(ps) + destroy_surface(ps); + } + FREE(us->u.array); + us->u.array = NULL; } } } diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h index 0195bf5afb..79ecd31bd7 100644 --- a/src/gallium/auxiliary/util/u_surfaces.h +++ b/src/gallium/auxiliary/util/u_surfaces.h @@ -4,14 +4,13 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "util/u_atomic.h" - -struct util_hash_table; +#include "cso_cache/cso_hash.h" struct util_surfaces { union { - struct util_hash_table *table; + struct cso_hash *hash; struct pipe_surface **array; } u; }; -- cgit v1.2.3 From 5668526c915a1e31036386d117c536592dae6859 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Tue, 3 Aug 2010 22:19:30 +0200 Subject: u_surfaces: add util_surfaces_peek Used to find out if a surface exists without creating one. --- src/gallium/auxiliary/util/u_surfaces.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h index 79ecd31bd7..af978c7057 100644 --- a/src/gallium/auxiliary/util/u_surfaces.h +++ b/src/gallium/auxiliary/util/u_surfaces.h @@ -12,6 +12,7 @@ struct util_surfaces { struct cso_hash *hash; struct pipe_surface **array; + void* pv; } u; }; @@ -34,6 +35,18 @@ util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct return util_surfaces_do_get(us, surface_struct_size, pscreen, pt, face, level, zslice, flags); } +static INLINE struct pipe_surface * +util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice) +{ + if(!us->u.pv) + return 0; + + if(unlikely(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)) + return cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level)); + else + return us->u.array[level]; +} + void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps); static INLINE void -- cgit v1.2.3 From b85c71d4e1e4ed788be834dff5b7b3c0cd0402ac Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Tue, 3 Aug 2010 21:20:53 +0200 Subject: auxiliary: support for transfers using staging resources Direct3D 10/11 has no concept of transfers. Applications instead create resources with a STAGING or DYNAMIC usage, copy between them and the real resource and use Map to map the STAGING/DYNAMIC resource. This util module allows to implement Gallium drivers as a Direct3D driver would be implemented: transfers allocate a resource with PIPE_USAGE_STAGING, and copy the data between it and the real resource with resource_copy_region. --- src/gallium/auxiliary/Makefile | 1 + src/gallium/auxiliary/util/u_staging.c | 93 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_staging.h | 29 +++++++++++ 3 files changed, 123 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_staging.c create mode 100644 src/gallium/auxiliary/util/u_staging.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 843b72bc38..9544e90a96 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -131,6 +131,7 @@ C_SOURCES = \ util/u_sampler.c \ util/u_simple_shaders.c \ util/u_snprintf.c \ + util/u_staging.c \ util/u_surface.c \ util/u_surfaces.c \ util/u_texture.c \ diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c new file mode 100644 index 0000000000..4853aaf6d9 --- /dev/null +++ b/src/gallium/auxiliary/util/u_staging.c @@ -0,0 +1,93 @@ +#include "util/u_staging.h" +#include "pipe/p_context.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +static void +util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigned height, unsigned depth, struct pipe_resource *template) +{ + memset(template, 0, sizeof(struct pipe_resource)); + if(pt->target != PIPE_BUFFER && depth <= 1) + template->target = PIPE_TEXTURE_2D; + else + template->target = pt->target; + template->format = pt->format; + template->width0 = width; + template->height0 = height; + template->depth0 = depth; + template->last_level = 0; + template->nr_samples = pt->nr_samples; + template->bind = 0; + template->usage = PIPE_USAGE_STAGING; + template->flags = 0; +} + +inline struct util_staging_transfer * +util_staging_transfer_new(struct pipe_context *pipe, + struct pipe_resource *pt, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + bool direct) +{ + struct pipe_screen *pscreen = pipe->screen; + struct util_staging_transfer *tx; + struct pipe_resource staging_resource_template; + + tx = CALLOC_STRUCT(util_staging_transfer); + if (!tx) + return NULL; + + pipe_resource_reference(&tx->base.resource, pt); + tx->base.sr = sr; + tx->base.usage = usage; + tx->base.box = *box; + + if (direct) + { + tx->staging_resource = pt; + return tx; + } + + util_staging_resource_template(pt, box->width, box->height, box->depth, &staging_resource_template); + tx->staging_resource = pscreen->resource_create(pscreen, &staging_resource_template); + if (!tx->staging_resource) + { + pipe_resource_reference(&tx->base.resource, NULL); + FREE(tx); + return NULL; + } + + if (usage & PIPE_TRANSFER_READ) + { + struct pipe_subresource dstsr; + dstsr.face = 0; + dstsr.level = 0; + for(unsigned zi = 0; zi < box->depth; ++zi) + pipe->resource_copy_region(pipe, tx->staging_resource, dstsr, 0, 0, 0, tx->base.resource, sr, box->x, box->y, box->z + zi, box->width, box->height); + } + + return tx; +} + +void +util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx) +{ + struct util_staging_transfer *tx = (struct util_staging_transfer *)ptx; + + if (tx->staging_resource != tx->base.resource) + { + if(tx->base.usage & PIPE_TRANSFER_WRITE) { + struct pipe_subresource srcsr; + srcsr.face = 0; + srcsr.level = 0; + for(unsigned zi = 0; zi < tx->base.box.depth; ++zi) + pipe->resource_copy_region(pipe, tx->base.resource, tx->base.sr, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi, tx->staging_resource, srcsr, 0, 0, 0, tx->base.box.width, tx->base.box.height); + } + + pipe_resource_reference(&tx->staging_resource, NULL); + } + + pipe_resource_reference(&ptx->resource, NULL); + FREE(ptx); +} diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h new file mode 100644 index 0000000000..f5976dadb1 --- /dev/null +++ b/src/gallium/auxiliary/util/u_staging.h @@ -0,0 +1,29 @@ +/* Implement transfers using staging resources like in DirectX 10/11 */ + +#ifndef U_STAGING_H +#define U_STAGING_H + +#include "pipe/p_state.h" + +struct util_staging_transfer { + struct pipe_transfer base; + + /* if direct, same as base.resource, otherwise the temporary staging resource */ + struct pipe_resource *staging_resource; +}; + +/* user must be stride, slice_stride and offset */ +/* pt->usage == PIPE_USAGE_DYNAMIC should be a good value to pass for direct */ +/* staging resource is currently created with PIPE_USAGE_DYNAMIC */ +struct util_staging_transfer * +util_staging_transfer_new(struct pipe_context *pipe, + struct pipe_resource *pt, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + bool direct); + +void +util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx); + +#endif -- cgit v1.2.3 From 4b20ad7559271a7785193094a3f110ef78e65253 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 11 Aug 2010 15:11:37 +0200 Subject: util: copy the u_staging commit message to the code --- src/gallium/auxiliary/util/u_staging.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h index f5976dadb1..602faa2971 100644 --- a/src/gallium/auxiliary/util/u_staging.h +++ b/src/gallium/auxiliary/util/u_staging.h @@ -1,4 +1,12 @@ -/* Implement transfers using staging resources like in DirectX 10/11 */ +/* Direct3D 10/11 has no concept of transfers. Applications instead + * create resources with a STAGING or DYNAMIC usage, copy between them + * and the real resource and use Map to map the STAGING/DYNAMIC resource. + * + * This util module allows to implement Gallium drivers as a Direct3D + * driver would be implemented: transfers allocate a resource with + * PIPE_USAGE_STAGING, and copy the data between it and the real resource + * with resource_copy_region. + */ #ifndef U_STAGING_H #define U_STAGING_H -- cgit v1.2.3 From f8533482f4a9b5ee7107f4e653d4ebf99ac63e2e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Aug 2010 15:11:12 +0100 Subject: gallivm: Use unsigned shift in lp_build_minify. Texture dimensions are unsigned. --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 955d328953..665b010ece 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -811,7 +811,7 @@ lp_build_minify(struct lp_build_sample_context *bld, LLVMValueRef base_size, LLVMValueRef level) { - LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify"); + LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify"); size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); return size; } -- cgit v1.2.3 From 10ce6779e8a64c33add70e440f885c210f3fa6ee Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Aug 2010 15:13:17 +0100 Subject: gallivm: Use lp_build_div instead of lp_build_mul + lp_build_rcp. Single divide, so let lp_build_div decide how to implement this. This will save a multiplication in architectures which don't have a RCP intrinsic. --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 665b010ece..307506507d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1219,8 +1219,7 @@ lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) /* ima = -0.5 / abs(coord); */ LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5); LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); - LLVMValueRef ima = lp_build_mul(coord_bld, negHalf, - lp_build_rcp(coord_bld, absCoord)); + LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord); return ima; } -- cgit v1.2.3 From deb809ec98664257ed215e75554e1a80c149c851 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 6 Aug 2010 11:08:45 +0200 Subject: auxiliary: fix util_framebuffer_copy util_framebuffer_copy was attempting to copy all elements of the source framebuffer state. However, this breaks if the user does not zero initialize the structure. Instead, only copy the elements up to nr_cbufs, and clear elements up to dst->nr_cbufs, if the destination was larger than the source. --- src/gallium/auxiliary/util/u_framebuffer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_framebuffer.c b/src/gallium/auxiliary/util/u_framebuffer.c index 768ae9ceb5..7803ec6a8b 100644 --- a/src/gallium/auxiliary/util/u_framebuffer.c +++ b/src/gallium/auxiliary/util/u_framebuffer.c @@ -85,9 +85,11 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, dst->width = src->width; dst->height = src->height; - for (i = 0; i < Elements(src->cbufs); i++) { + for (i = 0; i < src->nr_cbufs; i++) pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); - } + + for (i = src->nr_cbufs; i < dst->nr_cbufs; i++) + pipe_surface_reference(&dst->cbufs[i], NULL); dst->nr_cbufs = src->nr_cbufs; -- cgit v1.2.3 From 16b45ca7cefb3432b4133fe9d0b1dbfe3f286131 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 15:59:43 +0200 Subject: translate_generic: return NULL instead of assert(0) if format not supported This gives the caller a chance to recover (or crash anyway otherwise). --- src/gallium/auxiliary/translate/translate_generic.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 4d1977229e..809a4e47f4 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -201,12 +201,6 @@ emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) out[3] = TO_8_UNORM(attrib[3]); } -static void -emit_NULL( const float *attrib, void *ptr ) -{ - /* do nothing is the only sensible option */ -} - static emit_func get_emit_func( enum pipe_format format ) { switch (format) { @@ -343,8 +337,7 @@ static emit_func get_emit_func( enum pipe_format format ) return &emit_A8R8G8B8_UNORM; default: - assert(0); - return &emit_NULL; + return NULL; } } @@ -539,8 +532,12 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->attrib[i].instance_divisor = key->element[i].instance_divisor; tg->attrib[i].emit = get_emit_func(key->element[i].output_format); + if(!tg->attrib[i].emit) + { + FREE(tg); + return NULL; + } tg->attrib[i].output_offset = key->element[i].output_offset; - } tg->nr_attrib = key->nr_elements; -- cgit v1.2.3 From 684aeb366fb72a2dbf9dd7a0803f29880858cd06 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 16:00:32 +0200 Subject: translate_generic: fix broken A8R8G8B8_UNORM output translate was attempting to output A8R8G8B8_UNORM as if it were R8G8B8A8_UNORM. Now the tests just added pass. --- src/gallium/auxiliary/translate/translate_generic.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 809a4e47f4..021099ae98 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -187,9 +187,15 @@ ATTRIB( R8G8B8_SNORM, 3, char, TO_8_SNORM ) ATTRIB( R8G8_SNORM, 2, char, TO_8_SNORM ) ATTRIB( R8_SNORM, 1, char, TO_8_SNORM ) -ATTRIB( A8R8G8B8_UNORM, 4, ubyte, TO_8_UNORM ) -/*ATTRIB( R8G8B8A8_UNORM, 4, ubyte, TO_8_UNORM )*/ - +static void +emit_A8R8G8B8_UNORM( const float *attrib, void *ptr) +{ + ubyte *out = (ubyte *)ptr; + out[0] = TO_8_UNORM(attrib[3]); + out[1] = TO_8_UNORM(attrib[0]); + out[2] = TO_8_UNORM(attrib[1]); + out[3] = TO_8_UNORM(attrib[2]); +} static void emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) -- cgit v1.2.3 From 5266bc3c60033ccc3515b3a6d02f7219f981cbb1 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 16:59:25 +0200 Subject: Revert "translate_generic: return NULL instead of assert(0) if format not supported" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 16b45ca7cefb3432b4133fe9d0b1dbfe3f286131. José Fonseca asked for a revert. Note that the testsuite will now segfault since it attempts to test all possible formats. --- src/gallium/auxiliary/translate/translate_generic.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 021099ae98..0baa49a192 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -207,6 +207,12 @@ emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) out[3] = TO_8_UNORM(attrib[3]); } +static void +emit_NULL( const float *attrib, void *ptr ) +{ + /* do nothing is the only sensible option */ +} + static emit_func get_emit_func( enum pipe_format format ) { switch (format) { @@ -343,7 +349,8 @@ static emit_func get_emit_func( enum pipe_format format ) return &emit_A8R8G8B8_UNORM; default: - return NULL; + assert(0); + return &emit_NULL; } } @@ -538,12 +545,8 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->attrib[i].instance_divisor = key->element[i].instance_divisor; tg->attrib[i].emit = get_emit_func(key->element[i].output_format); - if(!tg->attrib[i].emit) - { - FREE(tg); - return NULL; - } tg->attrib[i].output_offset = key->element[i].output_offset; + } tg->nr_attrib = key->nr_elements; -- cgit v1.2.3 From b481a1237e00e1e1fb68ffca0653df3a96f21788 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Aug 2010 18:44:17 +0100 Subject: gallivm: Fix and enable the extra Newton/Raphson step in lp_build_rcp(). Thanks to Michal for spotting this. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index cf2feeb163..816ee70119 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1274,7 +1274,7 @@ lp_build_rcp(struct lp_build_context *bld, * when we have a better system in place to track minimum precision. */ -#if 0 +#if 1 /* * Do one Newton-Raphson step to improve precision: * @@ -1291,7 +1291,7 @@ lp_build_rcp(struct lp_build_context *bld, res = LLVMBuildFSub(bld->builder, two, res, ""); res = LLVMBuildFMul(bld->builder, res, rcp_a, ""); - return rcp_a; + return res; #else return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); #endif -- cgit v1.2.3 From 04cfc6234c9bf8c82343e39e0aa9def157e4091d Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 11 Aug 2010 12:04:54 -0700 Subject: auxiliary: Add u_staging.c to SCons build. This is a follow-up to commit b85c71d4e1e4ed788be834dff5b7b3c0cd0402ac which added u_staging.c to make. --- src/gallium/auxiliary/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 1f09198721..3124e20ce8 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -180,6 +180,7 @@ source = [ 'util/u_sampler.c', 'util/u_simple_shaders.c', 'util/u_snprintf.c', + 'util/u_staging.c', 'util/u_surface.c', 'util/u_surfaces.c', 'util/u_texture.c', -- cgit v1.2.3 From 27fe2347bc2ddf88cb666a95adfb0b12a39d42b9 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 11 Aug 2010 12:45:02 -0700 Subject: auxiliary: Make u_staging.c MSVC compatible. Fixes MSVC build. --- src/gallium/auxiliary/util/u_staging.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c index 4853aaf6d9..4dff8a0b5a 100644 --- a/src/gallium/auxiliary/util/u_staging.c +++ b/src/gallium/auxiliary/util/u_staging.c @@ -22,7 +22,7 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne template->flags = 0; } -inline struct util_staging_transfer * +INLINE struct util_staging_transfer * util_staging_transfer_new(struct pipe_context *pipe, struct pipe_resource *pt, struct pipe_subresource sr, @@ -61,9 +61,10 @@ util_staging_transfer_new(struct pipe_context *pipe, if (usage & PIPE_TRANSFER_READ) { struct pipe_subresource dstsr; + unsigned zi; dstsr.face = 0; dstsr.level = 0; - for(unsigned zi = 0; zi < box->depth; ++zi) + for(zi = 0; zi < box->depth; ++zi) pipe->resource_copy_region(pipe, tx->staging_resource, dstsr, 0, 0, 0, tx->base.resource, sr, box->x, box->y, box->z + zi, box->width, box->height); } @@ -79,9 +80,10 @@ util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *p { if(tx->base.usage & PIPE_TRANSFER_WRITE) { struct pipe_subresource srcsr; + unsigned zi; srcsr.face = 0; srcsr.level = 0; - for(unsigned zi = 0; zi < tx->base.box.depth; ++zi) + for(zi = 0; zi < tx->base.box.depth; ++zi) pipe->resource_copy_region(pipe, tx->base.resource, tx->base.sr, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi, tx->staging_resource, srcsr, 0, 0, 0, tx->base.box.width, tx->base.box.height); } -- cgit v1.2.3 From 10adb7840c3a37dedc940fe593b246336eebd71e Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 21:19:12 +0200 Subject: translate: allow clients to ask for supported output formats Currently translate asserts on unsupported output formats, making it impossible to use for some purposes, such as testing whether it actually works on all formats it supports. Removing the assert was met with opposition, so this change allows clients to ask whether an output format is supported, and they are thus able to avoid attempting to use it. Since this is just an addition to the API, no adverse effect is possible, and it makes the testsuite work again. --- src/gallium/auxiliary/translate/translate.c | 5 ++ src/gallium/auxiliary/translate/translate.h | 3 + .../auxiliary/translate/translate_generic.c | 80 ++++++++++++++++++++++ 3 files changed, 88 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c index a9b7253bf4..fe638e211f 100644 --- a/src/gallium/auxiliary/translate/translate.c +++ b/src/gallium/auxiliary/translate/translate.c @@ -48,3 +48,8 @@ struct translate *translate_create( const struct translate_key *key ) return translate_generic_create( key ); } + +boolean translate_is_output_format_supported(enum pipe_format format) +{ + return translate_generic_is_output_format_supported(format); +} diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index edd95e0788..eb6f2cc486 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -105,6 +105,8 @@ struct translate *translate_lookup_or_create( struct translate_context *tctx, struct translate *translate_create( const struct translate_key *key ); +boolean translate_is_output_format_supported(enum pipe_format format); + static INLINE int translate_keysize( const struct translate_key *key ) { return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element); @@ -138,5 +140,6 @@ struct translate *translate_sse2_create( const struct translate_key *key ); struct translate *translate_generic_create( const struct translate_key *key ); +boolean translate_generic_is_output_format_supported(enum pipe_format format); #endif diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 0baa49a192..42cfd763e9 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -554,3 +554,83 @@ struct translate *translate_generic_create( const struct translate_key *key ) return &tg->translate; } + +boolean translate_generic_is_output_format_supported(enum pipe_format format) +{ + switch(format) + { + case PIPE_FORMAT_R64G64B64A64_FLOAT: return TRUE; + case PIPE_FORMAT_R64G64B64_FLOAT: return TRUE; + case PIPE_FORMAT_R64G64_FLOAT: return TRUE; + case PIPE_FORMAT_R64_FLOAT: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_FLOAT: return TRUE; + case PIPE_FORMAT_R32G32B32_FLOAT: return TRUE; + case PIPE_FORMAT_R32G32_FLOAT: return TRUE; + case PIPE_FORMAT_R32_FLOAT: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_USCALED: return TRUE; + case PIPE_FORMAT_R32G32B32_USCALED: return TRUE; + case PIPE_FORMAT_R32G32_USCALED: return TRUE; + case PIPE_FORMAT_R32_USCALED: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_SSCALED: return TRUE; + case PIPE_FORMAT_R32G32B32_SSCALED: return TRUE; + case PIPE_FORMAT_R32G32_SSCALED: return TRUE; + case PIPE_FORMAT_R32_SSCALED: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_UNORM: return TRUE; + case PIPE_FORMAT_R32G32B32_UNORM: return TRUE; + case PIPE_FORMAT_R32G32_UNORM: return TRUE; + case PIPE_FORMAT_R32_UNORM: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_SNORM: return TRUE; + case PIPE_FORMAT_R32G32B32_SNORM: return TRUE; + case PIPE_FORMAT_R32G32_SNORM: return TRUE; + case PIPE_FORMAT_R32_SNORM: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_USCALED: return TRUE; + case PIPE_FORMAT_R16G16B16_USCALED: return TRUE; + case PIPE_FORMAT_R16G16_USCALED: return TRUE; + case PIPE_FORMAT_R16_USCALED: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_SSCALED: return TRUE; + case PIPE_FORMAT_R16G16B16_SSCALED: return TRUE; + case PIPE_FORMAT_R16G16_SSCALED: return TRUE; + case PIPE_FORMAT_R16_SSCALED: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_UNORM: return TRUE; + case PIPE_FORMAT_R16G16B16_UNORM: return TRUE; + case PIPE_FORMAT_R16G16_UNORM: return TRUE; + case PIPE_FORMAT_R16_UNORM: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_SNORM: return TRUE; + case PIPE_FORMAT_R16G16B16_SNORM: return TRUE; + case PIPE_FORMAT_R16G16_SNORM: return TRUE; + case PIPE_FORMAT_R16_SNORM: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_USCALED: return TRUE; + case PIPE_FORMAT_R8G8B8_USCALED: return TRUE; + case PIPE_FORMAT_R8G8_USCALED: return TRUE; + case PIPE_FORMAT_R8_USCALED: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_SSCALED: return TRUE; + case PIPE_FORMAT_R8G8B8_SSCALED: return TRUE; + case PIPE_FORMAT_R8G8_SSCALED: return TRUE; + case PIPE_FORMAT_R8_SSCALED: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_UNORM: return TRUE; + case PIPE_FORMAT_R8G8B8_UNORM: return TRUE; + case PIPE_FORMAT_R8G8_UNORM: return TRUE; + case PIPE_FORMAT_R8_UNORM: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_SNORM: return TRUE; + case PIPE_FORMAT_R8G8B8_SNORM: return TRUE; + case PIPE_FORMAT_R8G8_SNORM: return TRUE; + case PIPE_FORMAT_R8_SNORM: return TRUE; + + case PIPE_FORMAT_A8R8G8B8_UNORM: return TRUE; + case PIPE_FORMAT_B8G8R8A8_UNORM: return TRUE; + default: return FALSE; + } +} -- cgit v1.2.3 From 41c7ff11e6aa4d82d1175446aea0984bf28e3905 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 22:11:19 +0200 Subject: u_staging: remove useless inline keyword --- src/gallium/auxiliary/util/u_staging.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c index 4dff8a0b5a..607c31f5ee 100644 --- a/src/gallium/auxiliary/util/u_staging.c +++ b/src/gallium/auxiliary/util/u_staging.c @@ -22,7 +22,7 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne template->flags = 0; } -INLINE struct util_staging_transfer * +struct util_staging_transfer * util_staging_transfer_new(struct pipe_context *pipe, struct pipe_resource *pt, struct pipe_subresource sr, -- cgit v1.2.3 From de4784e36505316c2a5ab34cc5b371d17f38d3c5 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 12 Aug 2010 05:06:21 +0200 Subject: u_blitter: unify clear_depth_stencil and flush_depth_stencil No need to enable depth test for clear. --- src/gallium/auxiliary/util/u_blitter.c | 46 ---------------------------------- src/gallium/auxiliary/util/u_blitter.h | 2 -- src/gallium/drivers/r300/r300_blit.c | 4 ++- 3 files changed, 3 insertions(+), 49 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index b5b86b7214..1b9e957e3c 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -87,7 +87,6 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; - void *dsa_flush_depth_stencil; void *velem_state; @@ -158,12 +157,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) pipe->create_depth_stencil_alpha_state(pipe, &dsa); dsa.depth.writemask = 1; - ctx->dsa_flush_depth_stencil = - pipe->create_depth_stencil_alpha_state(pipe, &dsa); - - dsa.depth.enabled = 1; - dsa.depth.writemask = 1; - dsa.depth.func = PIPE_FUNC_ALWAYS; ctx->dsa_write_depth_keep_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); @@ -945,42 +938,3 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } - -/* Clear a region of a depth stencil surface. */ -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf) -{ - struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->base.pipe; - struct pipe_framebuffer_state fb_state; - - assert(dstsurf->texture); - if (!dstsurf->texture) - return; - - /* check the saved state */ - blitter_check_saved_CSOs(ctx); - assert(blitter->saved_fb_state.nr_cbufs != ~0); - - /* bind CSOs */ - pipe->bind_blend_state(pipe, ctx->blend_keep_color); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); - - pipe->bind_rasterizer_state(pipe, ctx->rs_state); - pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); - pipe->bind_vs_state(pipe, ctx->vs_col); - pipe->bind_vertex_elements_state(pipe, ctx->velem_state); - - /* set a framebuffer state */ - fb_state.width = dstsurf->width; - fb_state.height = dstsurf->height; - fb_state.nr_cbufs = 0; - fb_state.cbufs[0] = 0; - fb_state.zsbuf = dstsurf; - pipe->set_framebuffer_state(pipe, &fb_state); - - blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); - blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, - UTIL_BLITTER_ATTRIB_NONE, NULL); - blitter_restore_CSOs(ctx); -} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index f316587dea..ba3f92eca8 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -200,8 +200,6 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf); /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ff52286b5c..67e8288440 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -279,7 +279,9 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, PIPE_BIND_DEPTH_STENCIL); r300->z_decomp_rd = TRUE; r300_blitter_begin(r300, R300_CLEAR_SURFACE); - util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + util_blitter_clear_depth_stencil(r300->blitter, dstsurf, + PIPE_CLEAR_DEPTH, 0, 0, + 0, 0, dstsurf->width, dstsurf->height); r300_blitter_end(r300); r300->z_decomp_rd = FALSE; -- cgit v1.2.3 From f668ea11bd0b1f662e0be523a4bc46835e011ffa Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 12 Aug 2010 13:34:53 +0200 Subject: Revert "u_blitter: unify clear_depth_stencil and flush_depth_stencil" This reverts commit de4784e36505316c2a5ab34cc5b371d17f38d3c5. --- src/gallium/auxiliary/util/u_blitter.c | 46 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_blitter.h | 2 ++ src/gallium/drivers/r300/r300_blit.c | 4 +-- 3 files changed, 49 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 1b9e957e3c..b5b86b7214 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -87,6 +87,7 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; + void *dsa_flush_depth_stencil; void *velem_state; @@ -157,6 +158,12 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) pipe->create_depth_stencil_alpha_state(pipe, &dsa); dsa.depth.writemask = 1; + ctx->dsa_flush_depth_stencil = + pipe->create_depth_stencil_alpha_state(pipe, &dsa); + + dsa.depth.enabled = 1; + dsa.depth.writemask = 1; + dsa.depth.func = PIPE_FUNC_ALWAYS; ctx->dsa_write_depth_keep_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); @@ -938,3 +945,42 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } + +/* Clear a region of a depth stencil surface. */ +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->base.pipe; + struct pipe_framebuffer_state fb_state; + + assert(dstsurf->texture); + if (!dstsurf->texture) + return; + + /* check the saved state */ + blitter_check_saved_CSOs(ctx); + assert(blitter->saved_fb_state.nr_cbufs != ~0); + + /* bind CSOs */ + pipe->bind_blend_state(pipe, ctx->blend_keep_color); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); + + pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); + pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); + + /* set a framebuffer state */ + fb_state.width = dstsurf->width; + fb_state.height = dstsurf->height; + fb_state.nr_cbufs = 0; + fb_state.cbufs[0] = 0; + fb_state.zsbuf = dstsurf; + pipe->set_framebuffer_state(pipe, &fb_state); + + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, + UTIL_BLITTER_ATTRIB_NONE, NULL); + blitter_restore_CSOs(ctx); +} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index ba3f92eca8..f316587dea 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -200,6 +200,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf); /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 67e8288440..ff52286b5c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -279,9 +279,7 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, PIPE_BIND_DEPTH_STENCIL); r300->z_decomp_rd = TRUE; r300_blitter_begin(r300, R300_CLEAR_SURFACE); - util_blitter_clear_depth_stencil(r300->blitter, dstsurf, - PIPE_CLEAR_DEPTH, 0, 0, - 0, 0, dstsurf->width, dstsurf->height); + util_blitter_flush_depth_stencil(r300->blitter, dstsurf); r300_blitter_end(r300); r300->z_decomp_rd = FALSE; -- cgit v1.2.3 From eacb624a4a11867427955c812e64c00d5c82bcdd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 14 Aug 2010 18:02:47 +0100 Subject: gallivm: Refactor the Newton-Rapshon steps, and disable once again. It causes a very ugly corruption on the Earth's halo on Google Earth. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 111 +++++++++++++++++++++------- 1 file changed, 83 insertions(+), 28 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 816ee70119..7b35dd4bb4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -59,6 +59,19 @@ #include "lp_bld_arit.h" +/* + * XXX: Increasing eliminates some artifacts, but adds others, most + * noticeably corruption in the Earth halo in Google Earth. + */ +#define RCP_NEWTON_STEPS 0 + +#define RSQRT_NEWTON_STEPS 0 + +#define EXP_POLY_DEGREE 3 + +#define LOG_POLY_DEGREE 5 + + /** * Generate min(a, b) * No checks for special case values of a or b = 1 or 0 are done. @@ -1248,6 +1261,31 @@ lp_build_sqrt(struct lp_build_context *bld, } +/** + * Do one Newton-Raphson step to improve reciprocate precision: + * + * x_{i+1} = x_i * (2 - a * x_i) + * + * See also: + * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division + * - http://softwarecommunity.intel.com/articles/eng/1818.htm + */ +static INLINE LLVMValueRef +lp_build_rcp_refine(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef rcp_a) +{ + LLVMValueRef two = lp_build_const_vec(bld->type, 2.0); + LLVMValueRef res; + + res = LLVMBuildFMul(bld->builder, a, rcp_a, ""); + res = LLVMBuildFSub(bld->builder, two, res, ""); + res = LLVMBuildFMul(bld->builder, rcp_a, res, ""); + + return res; +} + + LLVMValueRef lp_build_rcp(struct lp_build_context *bld, LLVMValueRef a) @@ -1269,38 +1307,49 @@ lp_build_rcp(struct lp_build_context *bld, return LLVMConstFDiv(bld->one, a); if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { - /* - * XXX: Added precision is not always necessary, so only enable this - * when we have a better system in place to track minimum precision. - */ - -#if 1 - /* - * Do one Newton-Raphson step to improve precision: - * - * x1 = (2 - a * rcp(a)) * rcp(a) - */ - - LLVMValueRef two = lp_build_const_vec(bld->type, 2.0); - LLVMValueRef rcp_a; LLVMValueRef res; + unsigned i; - rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); + res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a); - res = LLVMBuildFMul(bld->builder, a, rcp_a, ""); - res = LLVMBuildFSub(bld->builder, two, res, ""); - res = LLVMBuildFMul(bld->builder, res, rcp_a, ""); + for (i = 0; i < RCP_NEWTON_STEPS; ++i) { + res = lp_build_rcp_refine(bld, a, res); + } return res; -#else - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); -#endif } return LLVMBuildFDiv(bld->builder, bld->one, a, ""); } +/** + * Do one Newton-Raphson step to improve rsqrt precision: + * + * x_{i+1} = 0.5 * x_i * (3.0 - a * x_i * x_i) + * + * See also: + * - http://softwarecommunity.intel.com/articles/eng/1818.htm + */ +static INLINE LLVMValueRef +lp_build_rsqrt_refine(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef rsqrt_a) +{ + LLVMValueRef half = lp_build_const_vec(bld->type, 0.5); + LLVMValueRef three = lp_build_const_vec(bld->type, 3.0); + LLVMValueRef res; + + res = LLVMBuildFMul(bld->builder, rsqrt_a, rsqrt_a, ""); + res = LLVMBuildFMul(bld->builder, a, res, ""); + res = LLVMBuildFSub(bld->builder, three, res, ""); + res = LLVMBuildFMul(bld->builder, rsqrt_a, res, ""); + res = LLVMBuildFMul(bld->builder, half, res, ""); + + return res; +} + + /** * Generate 1/sqrt(a) */ @@ -1314,8 +1363,18 @@ lp_build_rsqrt(struct lp_build_context *bld, assert(type.floating); - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a); + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { + LLVMValueRef res; + unsigned i; + + res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a); + + for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) { + res = lp_build_rsqrt_refine(bld, a, res); + } + + return res; + } return lp_build_rcp(bld, lp_build_sqrt(bld, a)); } @@ -1821,10 +1880,6 @@ lp_build_log(struct lp_build_context *bld, } -#define EXP_POLY_DEGREE 3 -#define LOG_POLY_DEGREE 5 - - /** * Generate polynomial. * Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2]. -- cgit v1.2.3 From 923256626931c057d1a7c20d8900768b0c1faea9 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 13 Aug 2010 15:26:29 +0200 Subject: u_cpu_detect: remove arch and little_endian This logic duplicates the one in p_config.h, so remove it and adjust the only two places that were using it. --- src/gallium/auxiliary/gallivm/lp_bld_pack.c | 7 +++---- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 6 +++++- src/gallium/auxiliary/util/u_cpu_detect.c | 18 ------------------ src/gallium/auxiliary/util/u_cpu_detect.h | 13 +------------ 4 files changed, 9 insertions(+), 35 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index ecfb13a0d4..b7b630f2e8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -171,14 +171,13 @@ lp_build_unpack2(LLVMBuilderRef builder, msb = lp_build_zero(src_type); /* Interleave bits */ - if(util_cpu_caps.little_endian) { +#ifdef PIPE_ARCH_LITTLE_ENDIAN *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0); *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1); - } - else { +#else *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0); *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1); - } +#endif /* Cast the result into the new type (twice as wide) */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 307506507d..02d43e373a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1840,7 +1840,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, unsigned i, j; for(j = 0; j < h16.type.length; j += 4) { - unsigned subindex = util_cpu_caps.little_endian ? 0 : 1; +#ifdef PIPE_ARCH_LITTLE_ENDIAN + unsigned subindex = 0; +#else + unsigned subindex = 1; +#endif LLVMValueRef index; index = LLVMConstInt(elem_type, j/2 + subindex, 0); diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index b1a8c75b99..2bbc554a90 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -391,23 +391,6 @@ util_cpu_detect(void) memset(&util_cpu_caps, 0, sizeof util_cpu_caps); - /* Check for arch type */ -#if defined(PIPE_ARCH_MIPS) - util_cpu_caps.arch = UTIL_CPU_ARCH_MIPS; -#elif defined(PIPE_ARCH_ALPHA) - util_cpu_caps.arch = UTIL_CPU_ARCH_ALPHA; -#elif defined(PIPE_ARCH_SPARC) - util_cpu_caps.arch = UTIL_CPU_ARCH_SPARC; -#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - util_cpu_caps.arch = UTIL_CPU_ARCH_X86; - util_cpu_caps.little_endian = 1; -#elif defined(PIPE_ARCH_PPC) - util_cpu_caps.arch = UTIL_CPU_ARCH_POWERPC; - util_cpu_caps.little_endian = 0; -#else - util_cpu_caps.arch = UTIL_CPU_ARCH_UNKNOWN; -#endif - /* Count the number of CPUs in system */ #if defined(PIPE_OS_WINDOWS) { @@ -504,7 +487,6 @@ util_cpu_detect(void) #ifdef DEBUG if (debug_get_option_dump_cpu()) { - debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch); debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h index 4b3dc39c34..f3bef0993c 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.h +++ b/src/gallium/auxiliary/util/u_cpu_detect.h @@ -36,26 +36,15 @@ #define _UTIL_CPU_DETECT_H #include "pipe/p_compiler.h" - -enum util_cpu_arch { - UTIL_CPU_ARCH_UNKNOWN = 0, - UTIL_CPU_ARCH_MIPS, - UTIL_CPU_ARCH_ALPHA, - UTIL_CPU_ARCH_SPARC, - UTIL_CPU_ARCH_X86, - UTIL_CPU_ARCH_POWERPC -}; +#include "pipe/p_config.h" struct util_cpu_caps { - enum util_cpu_arch arch; unsigned nr_cpus; /* Feature flags */ int x86_cpu_type; unsigned cacheline; - unsigned little_endian:1; - unsigned has_tsc:1; unsigned has_mmx:1; unsigned has_mmx2:1; -- cgit v1.2.3 From 10d77f3f6b86eeb3ec1d9736c02335831e5c73c2 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 14 Aug 2010 12:54:21 -0700 Subject: gallivm: Remove unnecessary header. --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 02d43e373a..806c7d56a8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -40,7 +40,6 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_format.h" -#include "util/u_cpu_detect.h" #include "lp_bld_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" -- cgit v1.2.3 From 7f5202be63c6dc639e57d11ef8253e79dd349f59 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 15 Aug 2010 01:00:42 +0100 Subject: gallium: Make printing info on debug builds default off This commit silences the printing off most of the debug information when running debug builds. The big culprits are: the tgsi sanity checker that gets run on all shaders on debug; all the options; and finaly the cpu caps printer. --- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_sanity.h | 2 +- src/gallium/auxiliary/util/u_cpu_detect.c | 2 +- src/gallium/auxiliary/util/u_debug.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 287ee006cf..acbff103ef 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -34,7 +34,7 @@ #include "tgsi_iterate.h" -DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE) +DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", FALSE) typedef struct { diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h index 46d8d18419..73f0f414e3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -36,7 +36,7 @@ extern "C" { /* Check the given token stream for errors and common mistakes. * Diagnostic messages are printed out to the debug output, and is - * controlled by the debug option TGSI_PRINT_SANITY (default true). + * controlled by the debug option TGSI_PRINT_SANITY (default false). * Returns TRUE if there are no errors, even though there could be some warnings. */ boolean diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 2bbc554a90..5056351307 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -73,7 +73,7 @@ #endif -DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE) +DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE) struct util_cpu_caps util_cpu_caps; diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index ad162558bc..504e6d2a18 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -88,7 +88,7 @@ debug_get_option_should_print(void) * but its cool since we set first to false */ first = FALSE; - value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", TRUE); + value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", FALSE); /* XXX should we print this option? Currently it wont */ return value; } -- cgit v1.2.3