diff options
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
59 files changed, 5419 insertions, 1665 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 41ac1cee72..4a3fc036c4 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -6,7 +6,12 @@ LIBNAME = llvmpipe DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ - lp_buffer.c \ + lp_bld_alpha.c \ + lp_bld_blend_aos.c \ + lp_bld_blend_logicop.c \ + lp_bld_blend_soa.c \ + lp_bld_depth.c \ + lp_bld_interp.c \ lp_clear.c \ lp_context.c \ lp_draw_arrays.c \ @@ -42,20 +47,19 @@ C_SOURCES = \ CPP_SOURCES = \ +PROGS := lp_test_format \ + lp_test_blend \ + lp_test_conv \ + lp_test_printf + include ../../Makefile.template -lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv +lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ -testprogs := lp_test_format \ - lp_test_blend \ - lp_test_conv - -LIBS += $(GL_LIB_DEPS) -L. -lllvmpipe -L../../auxiliary/ -lgallium +LIBS += $(GL_LIB_DEPS) -L../../auxiliary/ -lgallium libllvmpipe.a -$(testprogs): lp_test_% : lp_test_%.o lp_test_main.o libllvmpipe.a - $(LD) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS) -Wl,--end-group +$(PROGS): lp_test_main.o libllvmpipe.a -default: $(testprogs) diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index bf4c9a5727..3c3fd386b5 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -12,7 +12,11 @@ Done so far is: - depth testing - - texture sampling (not all state/formats are supported) + - texture sampling + - 1D/2D/3D/cube maps supported + - all texture wrap modes supported + - all texture filtering modes supported + - perhaps not all texture formats yet supported - fragment shader TGSI translation - same level of support as the TGSI SSE2 exec machine, with the exception @@ -37,8 +41,6 @@ To do (probably by this order): - code generate stipple and stencil testing - - translate the remaining bits of texture sampling state - - translate TGSI control flow instructions, and all other remaining opcodes - integrate with the draw module for VS code generation @@ -57,7 +59,7 @@ Requirements See /proc/cpuinfo to know what your CPU supports. - - LLVM 2.6. + - LLVM 2.6 (or later) For Linux, on a recent Debian based distribution do: @@ -67,6 +69,9 @@ Requirements http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment variable to the extracted path. + The version of LLVM from SVN ("2.7svn") from mid-March 2010 seems pretty + stable and has some features not in version 2.6. + - scons (optional) - udis86, http://udis86.sourceforge.net/ (optional): @@ -140,11 +145,13 @@ Development Notes then skim through the lp_bld_* functions called in there, and the comments at the top of the lp_bld_*.c functions. -- All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be - put in a stand-alone Gallium state -> LLVM IR translation module. +- The driver-independent parts of the LLVM / Gallium code are found in + src/gallium/auxiliary/gallivm/. The filenames and function prefixes + need to be renamed from "lp_bld_" to something else though. - We use LLVM-C bindings for now. They are not documented, but follow the C++ interfaces very closely, and appear to be complete enough for code generation. See http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html for a stand-alone example. + See the llvm-c/Core.h file for reference. diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 13c1a13e87..b9e9826e2a 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -1,12 +1,11 @@ Import('*') -env = env.Clone() - -env.Tool('llvm') -if not env.has_key('LLVM_VERSION'): - print 'warning: LLVM not found: not building llvmpipe' +if not env['llvm']: + print 'warning: LLVM disabled: not building llvmpipe' Return() +env = env.Clone() + env.Tool('udis86') env.Append(CPPPATH = ['.']) @@ -22,13 +21,17 @@ env.CodeGenerate( env.Depends('lp_tile_soa.c', [ '#src/gallium/auxiliary/util/u_format_parse.py', '#src/gallium/auxiliary/util/u_format_pack.py', - '#src/gallium/auxiliary/util/u_format_access.py', ]) llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ - 'lp_buffer.c', + 'lp_bld_alpha.c', + 'lp_bld_blend_aos.c', + 'lp_bld_blend_logicop.c', + 'lp_bld_blend_soa.c', + 'lp_bld_depth.c', + 'lp_bld_interp.c', 'lp_clear.c', 'lp_context.c', 'lp_draw_arrays.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c new file mode 100644 index 0000000000..8514030cde --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -0,0 +1,64 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Alpha testing to LLVM IR translation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_state.h" + +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" + +#include "lp_bld_alpha.h" + + +void +lp_build_alpha_test(LLVMBuilderRef builder, + const struct pipe_alpha_state *state, + struct lp_type type, + struct lp_build_mask_context *mask, + LLVMValueRef alpha, + LLVMValueRef ref) +{ + struct lp_build_context bld; + + lp_build_context_init(&bld, builder, type); + + if(state->enabled) { + LLVMValueRef test = lp_build_cmp(&bld, state->func, alpha, ref); + + lp_build_name(test, "alpha_mask"); + + lp_build_mask_update(mask, test); + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_buffer.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index d6b8184a0b..0f99fec65e 100644 --- a/src/gallium/drivers/llvmpipe/lp_buffer.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -25,31 +25,30 @@ * **************************************************************************/ -#ifndef LP_BUFFER_H -#define LP_BUFFER_H - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" +/** + * Alpha testing to LLVM IR translation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ +#ifndef LP_BLD_ALPHA_H +#define LP_BLD_ALPHA_H -struct llvmpipe_buffer -{ - struct pipe_buffer base; - boolean userBuffer; /** Is this a user-space buffer? */ - void *data; -}; +#include "gallivm/lp_bld.h" -/** Cast wrapper */ -static INLINE struct llvmpipe_buffer * -llvmpipe_buffer( struct pipe_buffer *buf ) -{ - return (struct llvmpipe_buffer *)buf; -} +struct pipe_alpha_state; +struct lp_type; +struct lp_build_mask_context; void -llvmpipe_init_screen_buffer_funcs(struct pipe_screen *screen); +lp_build_alpha_test(LLVMBuilderRef builder, + const struct pipe_alpha_state *state, + struct lp_type type, + struct lp_build_mask_context *mask, + LLVMValueRef alpha, + LLVMValueRef ref); -#endif /* LP_BUFFER_H */ +#endif /* !LP_BLD_ALPHA_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h new file mode 100644 index 0000000000..ebbdb1a604 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_BLD_BLEND_H +#define LP_BLD_BLEND_H + + +/** + * @file + * LLVM IR building helpers interfaces. + * + * We use LLVM-C bindings for now. They are not documented, but follow the C++ + * interfaces very closely, and appear to be complete enough for code + * genration. See + * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html + * for a standalone example. + */ + +#include "gallivm/lp_bld.h" + +#include "pipe/p_format.h" + + +struct pipe_blend_state; +struct lp_type; +struct lp_build_context; + + +/** + * Whether the blending function is commutative or not. + */ +boolean +lp_build_blend_func_commutative(unsigned func); + + +/** + * Whether the blending functions are the reverse of each other. + */ +boolean +lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func); + + +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, + unsigned func, + LLVMValueRef term1, + LLVMValueRef term2); + + +LLVMValueRef +lp_build_blend_aos(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + struct lp_type type, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef const_, + unsigned alpha_swizzle); + + +void +lp_build_blend_soa(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + struct lp_type type, + LLVMValueRef src[4], + LLVMValueRef dst[4], + LLVMValueRef const_[4], + LLVMValueRef res[4]); + + +/** + * Apply a logic op. + * + * src/dst parameters are packed values. It should work regardless the inputs + * are scalars, or a vector. + */ +LLVMValueRef +lp_build_logicop(LLVMBuilderRef builder, + unsigned logicop_func, + LLVMValueRef src, + LLVMValueRef dst); + + +#endif /* !LP_BLD_BLEND_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c new file mode 100644 index 0000000000..3fa5e51cac --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -0,0 +1,360 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Blend LLVM IR generation -- AoS layout. + * + * AoS blending is in general much slower than SoA, but there are some cases + * where it might be faster. In particular, if a pixel is rendered only once + * then the overhead of tiling and untiling will dominate over the speedup that + * SoA gives. So we might want to detect such cases and fallback to AoS in the + * future, but for now this function is here for historical/benchmarking + * purposes. + * + * Run lp_blend_test after any change to this file. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "pipe/p_state.h" +#include "util/u_debug.h" + +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_debug.h" + +#include "lp_bld_blend.h" + + +/** + * We may the same values several times, so we keep them here to avoid + * recomputing them. Also reusing the values allows us to do simplifications + * that LLVM optimization passes wouldn't normally be able to do. + */ +struct lp_build_blend_aos_context +{ + struct lp_build_context base; + + LLVMValueRef src; + LLVMValueRef dst; + LLVMValueRef const_; + + LLVMValueRef inv_src; + LLVMValueRef inv_dst; + LLVMValueRef inv_const; + LLVMValueRef saturate; + + LLVMValueRef rgb_src_factor; + LLVMValueRef alpha_src_factor; + LLVMValueRef rgb_dst_factor; + LLVMValueRef alpha_dst_factor; +}; + + +static LLVMValueRef +lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, + unsigned factor, + boolean alpha) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return bld->base.zero; + case PIPE_BLENDFACTOR_ONE: + return bld->base.one; + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_SRC_ALPHA: + return bld->src; + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_DST_ALPHA: + return bld->dst; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + if(alpha) + return bld->base.one; + else { + if(!bld->inv_dst) + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); + if(!bld->saturate) + bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst); + return bld->saturate; + } + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_CONST_ALPHA: + return bld->const_; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + if(!bld->inv_src) + bld->inv_src = lp_build_comp(&bld->base, bld->src); + return bld->inv_src; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + if(!bld->inv_dst) + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); + return bld->inv_dst; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + if(!bld->inv_const) + bld->inv_const = lp_build_comp(&bld->base, bld->const_); + return bld->inv_const; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + default: + assert(0); + return bld->base.zero; + } +} + + +enum lp_build_blend_swizzle { + LP_BUILD_BLEND_SWIZZLE_RGBA = 0, + LP_BUILD_BLEND_SWIZZLE_AAAA = 1 +}; + + +/** + * How should we shuffle the base factor. + */ +static enum lp_build_blend_swizzle +lp_build_blend_factor_swizzle(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + case PIPE_BLENDFACTOR_ZERO: + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return LP_BUILD_BLEND_SWIZZLE_RGBA; + case PIPE_BLENDFACTOR_SRC_ALPHA: + case PIPE_BLENDFACTOR_DST_ALPHA: + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return LP_BUILD_BLEND_SWIZZLE_AAAA; + default: + assert(0); + return LP_BUILD_BLEND_SWIZZLE_RGBA; + } +} + + +static LLVMValueRef +lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, + LLVMValueRef rgb, + LLVMValueRef alpha, + enum lp_build_blend_swizzle rgb_swizzle, + unsigned alpha_swizzle) +{ + if(rgb == alpha) { + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) + return rgb; + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) + return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle); + } + else { + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) { + boolean cond[4] = {0, 0, 0, 0}; + cond[alpha_swizzle] = 1; + return lp_build_select_aos(&bld->base, alpha, rgb, cond); + } + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) { + unsigned char swizzle[4]; + swizzle[0] = alpha_swizzle; + swizzle[1] = alpha_swizzle; + swizzle[2] = alpha_swizzle; + swizzle[3] = alpha_swizzle; + swizzle[alpha_swizzle] += 4; + return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle); + } + } + assert(0); + return bld->base.undef; +} + + +/** + * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml + */ +static LLVMValueRef +lp_build_blend_factor(struct lp_build_blend_aos_context *bld, + LLVMValueRef factor1, + unsigned rgb_factor, + unsigned alpha_factor, + unsigned alpha_swizzle) +{ + LLVMValueRef rgb_factor_; + LLVMValueRef alpha_factor_; + LLVMValueRef factor2; + enum lp_build_blend_swizzle rgb_swizzle; + + rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); + alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); + + rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); + + factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); + + return lp_build_mul(&bld->base, factor1, factor2); +} + + +boolean +lp_build_blend_func_commutative(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + case PIPE_BLEND_MIN: + case PIPE_BLEND_MAX: + return TRUE; + case PIPE_BLEND_SUBTRACT: + case PIPE_BLEND_REVERSE_SUBTRACT: + return FALSE; + default: + assert(0); + return TRUE; + } +} + + +boolean +lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) +{ + if(rgb_func == alpha_func) + return FALSE; + if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) + return TRUE; + if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) + return TRUE; + return FALSE; +} + + +/** + * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml + */ +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, + unsigned func, + LLVMValueRef term1, + LLVMValueRef term2) +{ + switch (func) { + case PIPE_BLEND_ADD: + return lp_build_add(bld, term1, term2); + case PIPE_BLEND_SUBTRACT: + return lp_build_sub(bld, term1, term2); + case PIPE_BLEND_REVERSE_SUBTRACT: + return lp_build_sub(bld, term2, term1); + case PIPE_BLEND_MIN: + return lp_build_min(bld, term1, term2); + case PIPE_BLEND_MAX: + return lp_build_max(bld, term1, term2); + default: + assert(0); + return bld->zero; + } +} + + +LLVMValueRef +lp_build_blend_aos(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + struct lp_type type, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef const_, + unsigned alpha_swizzle) +{ + struct lp_build_blend_aos_context bld; + LLVMValueRef src_term; + LLVMValueRef dst_term; + + /* FIXME */ + assert(blend->independent_blend_enable == 0); + assert(blend->rt[0].colormask == 0xf); + + if(!blend->rt[0].blend_enable) + return src; + + /* It makes no sense to blend unless values are normalized */ + assert(type.norm); + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.base, builder, type); + bld.src = src; + bld.dst = dst; + bld.const_ = const_; + + /* TODO: There are still a few optimization opportunities here. For certain + * combinations it is possible to reorder the operations and therefore saving + * some instructions. */ + + src_term = lp_build_blend_factor(&bld, src, blend->rt[0].rgb_src_factor, + blend->rt[0].alpha_src_factor, alpha_swizzle); + dst_term = lp_build_blend_factor(&bld, dst, blend->rt[0].rgb_dst_factor, + blend->rt[0].alpha_dst_factor, alpha_swizzle); + + lp_build_name(src_term, "src_term"); + lp_build_name(dst_term, "dst_term"); + + if(blend->rt[0].rgb_func == blend->rt[0].alpha_func) { + return lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); + } + else { + /* Seperate RGB / A functions */ + + LLVMValueRef rgb; + LLVMValueRef alpha; + + rgb = lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); + alpha = lp_build_blend_func(&bld.base, blend->rt[0].alpha_func, src_term, dst_term); + + return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c new file mode 100644 index 0000000000..1eac0a5c89 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c @@ -0,0 +1,109 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Blend LLVM IR generation -- logic ops. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "pipe/p_state.h" +#include "util/u_debug.h" + +#include "lp_bld_blend.h" + + +LLVMValueRef +lp_build_logicop(LLVMBuilderRef builder, + unsigned logicop_func, + LLVMValueRef src, + LLVMValueRef dst) +{ + LLVMTypeRef type; + LLVMValueRef res; + + type = LLVMTypeOf(src); + + switch (logicop_func) { + case PIPE_LOGICOP_CLEAR: + res = LLVMConstNull(type); + break; + case PIPE_LOGICOP_NOR: + res = LLVMBuildNot(builder, LLVMBuildOr(builder, src, dst, ""), ""); + break; + case PIPE_LOGICOP_AND_INVERTED: + res = LLVMBuildAnd(builder, LLVMBuildNot(builder, src, ""), dst, ""); + break; + case PIPE_LOGICOP_COPY_INVERTED: + res = LLVMBuildNot(builder, src, ""); + break; + case PIPE_LOGICOP_AND_REVERSE: + res = LLVMBuildAnd(builder, src, LLVMBuildNot(builder, dst, ""), ""); + break; + case PIPE_LOGICOP_INVERT: + res = LLVMBuildNot(builder, dst, ""); + break; + case PIPE_LOGICOP_XOR: + res = LLVMBuildXor(builder, src, dst, ""); + break; + case PIPE_LOGICOP_NAND: + res = LLVMBuildNot(builder, LLVMBuildAnd(builder, src, dst, ""), ""); + break; + case PIPE_LOGICOP_AND: + res = LLVMBuildAnd(builder, src, dst, ""); + break; + case PIPE_LOGICOP_EQUIV: + res = LLVMBuildNot(builder, LLVMBuildXor(builder, src, dst, ""), ""); + break; + case PIPE_LOGICOP_NOOP: + res = dst; + break; + case PIPE_LOGICOP_OR_INVERTED: + res = LLVMBuildOr(builder, LLVMBuildNot(builder, src, ""), dst, ""); + break; + case PIPE_LOGICOP_COPY: + res = src; + break; + case PIPE_LOGICOP_OR_REVERSE: + res = LLVMBuildOr(builder, src, LLVMBuildNot(builder, dst, ""), ""); + break; + case PIPE_LOGICOP_OR: + res = LLVMBuildOr(builder, src, dst, ""); + break; + case PIPE_LOGICOP_SET: + res = LLVMConstAllOnes(type); + break; + default: + assert(0); + res = src; + } + + return res; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c new file mode 100644 index 0000000000..b7523eb9c1 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -0,0 +1,315 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Blend LLVM IR generation -- SoA layout. + * + * Blending in SoA is much faster than AoS, especially when separate rgb/alpha + * factors/functions are used, since no channel masking/shuffling is necessary + * and we can achieve the full throughput of the SIMD operations. Furthermore + * the fragment shader output is also in SoA, so it fits nicely with the rest of + * the fragment pipeline. + * + * The drawback is that to be displayed the color buffer needs to be in AoS + * layout, so we need to tile/untile the color buffer before/after rendering. + * A color buffer like + * + * R11 G11 B11 A11 R12 G12 B12 A12 R13 G13 B13 A13 R14 G14 B14 A14 ... + * R21 G21 B21 A21 R22 G22 B22 A22 R23 G23 B23 A23 R24 G24 B24 A24 ... + * + * R31 G31 B31 A31 R32 G32 B32 A32 R33 G33 B33 A33 R34 G34 B34 A34 ... + * R41 G41 B41 A41 R42 G42 B42 A42 R43 G43 B43 A43 R44 G44 B44 A44 ... + * + * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... + * + * will actually be stored in memory as + * + * R11 R12 R21 R22 R13 R14 R23 R24 ... G11 G12 G21 G22 G13 G14 G23 G24 ... B11 B12 B21 B22 B13 B14 B23 B24 ... A11 A12 A21 A22 A13 A14 A23 A24 ... + * R31 R32 R41 R42 R33 R34 R43 R44 ... G31 G32 G41 G42 G33 G34 G43 G44 ... B31 B32 B41 B42 B33 B34 B43 B44 ... A31 A32 A41 A42 A33 A34 A43 A44 ... + * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... + * + * NOTE: Run lp_blend_test after any change to this file. + * + * You can also run lp_blend_test to obtain AoS vs SoA benchmarks. Invoking it + * as: + * + * lp_blend_test -o blend.tsv + * + * will generate a tab-seperated-file with the test results and performance + * measurements. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "pipe/p_state.h" +#include "util/u_debug.h" + +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_arit.h" +#include "lp_bld_blend.h" + + +/** + * We may the same values several times, so we keep them here to avoid + * recomputing them. Also reusing the values allows us to do simplifications + * that LLVM optimization passes wouldn't normally be able to do. + */ +struct lp_build_blend_soa_context +{ + struct lp_build_context base; + + LLVMValueRef src[4]; + LLVMValueRef dst[4]; + LLVMValueRef con[4]; + + LLVMValueRef inv_src[4]; + LLVMValueRef inv_dst[4]; + LLVMValueRef inv_con[4]; + + LLVMValueRef src_alpha_saturate; + + /** + * We store all factors in a table in order to eliminate redundant + * multiplications later. + */ + LLVMValueRef factor[2][2][4]; + + /** + * Table with all terms. + */ + LLVMValueRef term[2][4]; +}; + + +static LLVMValueRef +lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, + unsigned factor, unsigned i) +{ + /* + * Compute src/first term RGB + */ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + return bld->base.one; + case PIPE_BLENDFACTOR_SRC_COLOR: + return bld->src[i]; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return bld->src[3]; + case PIPE_BLENDFACTOR_DST_COLOR: + return bld->dst[i]; + case PIPE_BLENDFACTOR_DST_ALPHA: + return bld->dst[3]; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + if(i == 3) + return bld->base.one; + else { + if(!bld->inv_dst[3]) + bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); + if(!bld->src_alpha_saturate) + bld->src_alpha_saturate = lp_build_min(&bld->base, bld->src[3], bld->inv_dst[3]); + return bld->src_alpha_saturate; + } + case PIPE_BLENDFACTOR_CONST_COLOR: + return bld->con[i]; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return bld->con[3]; + case PIPE_BLENDFACTOR_SRC1_COLOR: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_ZERO: + return bld->base.zero; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + if(!bld->inv_src[i]) + bld->inv_src[i] = lp_build_comp(&bld->base, bld->src[i]); + return bld->inv_src[i]; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + if(!bld->inv_src[3]) + bld->inv_src[3] = lp_build_comp(&bld->base, bld->src[3]); + return bld->inv_src[3]; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + if(!bld->inv_dst[i]) + bld->inv_dst[i] = lp_build_comp(&bld->base, bld->dst[i]); + return bld->inv_dst[i]; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + if(!bld->inv_dst[3]) + bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); + return bld->inv_dst[3]; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + if(!bld->inv_con[i]) + bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]); + return bld->inv_con[i]; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + if(!bld->inv_con[3]) + bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]); + return bld->inv_con[3]; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + default: + assert(0); + return bld->base.zero; + } +} + + +/** + * Generate blend code in SOA mode. + * \param src src/fragment color + * \param dst dst/framebuffer color + * \param con constant blend color + * \param res the result/output + */ +void +lp_build_blend_soa(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + struct lp_type type, + LLVMValueRef src[4], + LLVMValueRef dst[4], + LLVMValueRef con[4], + LLVMValueRef res[4]) +{ + struct lp_build_blend_soa_context bld; + unsigned i, j, k; + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.base, builder, type); + for (i = 0; i < 4; ++i) { + bld.src[i] = src[i]; + bld.dst[i] = dst[i]; + bld.con[i] = con[i]; + } + + for (i = 0; i < 4; ++i) { + if (blend->rt[0].colormask & (1 << i)) { + if (blend->logicop_enable) { + if(!type.floating) { + res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]); + } + else + res[i] = dst[i]; + } + else if (blend->rt[0].blend_enable) { + unsigned src_factor = i < 3 ? blend->rt[0].rgb_src_factor : blend->rt[0].alpha_src_factor; + unsigned dst_factor = i < 3 ? blend->rt[0].rgb_dst_factor : blend->rt[0].alpha_dst_factor; + unsigned func = i < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func; + boolean func_commutative = lp_build_blend_func_commutative(func); + + /* It makes no sense to blend unless values are normalized */ + assert(type.norm); + + /* + * Compute src/dst factors. + */ + + bld.factor[0][0][i] = src[i]; + bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i); + bld.factor[1][0][i] = dst[i]; + bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i); + + /* + * Compute src/dst terms + */ + + for(k = 0; k < 2; ++k) { + /* See if this multiplication has been previously computed */ + for(j = 0; j < i; ++j) { + if((bld.factor[k][0][j] == bld.factor[k][0][i] && + bld.factor[k][1][j] == bld.factor[k][1][i]) || + (bld.factor[k][0][j] == bld.factor[k][1][i] && + bld.factor[k][1][j] == bld.factor[k][0][i])) + break; + } + + if(j < i) + bld.term[k][i] = bld.term[k][j]; + else + bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); + + if (src_factor == PIPE_BLENDFACTOR_ZERO && + (dst_factor == PIPE_BLENDFACTOR_DST_ALPHA || + dst_factor == PIPE_BLENDFACTOR_INV_DST_ALPHA)) { + /* XXX special case these combos to work around an apparent + * bug in LLVM. + * This hack disables the check for multiplication by zero + * in lp_bld_mul(). When we optimize away the multiplication, + * something goes wrong during code generation and we segfault + * at runtime. + */ + LLVMValueRef zeroSave = bld.base.zero; + bld.base.zero = NULL; + bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], + bld.factor[k][1][i]); + bld.base.zero = zeroSave; + } + } + + /* + * Combine terms + */ + + /* See if this function has been previously applied */ + for(j = 0; j < i; ++j) { + unsigned prev_func = j < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func; + unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); + + if((!func_reverse && + bld.term[0][j] == bld.term[0][i] && + bld.term[1][j] == bld.term[1][i]) || + ((func_commutative || func_reverse) && + bld.term[0][j] == bld.term[1][i] && + bld.term[1][j] == bld.term[0][i])) + break; + } + + if(j < i) + res[i] = res[j]; + else + res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]); + } + else { + res[i] = src[i]; + } + } + else { + res[i] = dst[i]; + } + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c new file mode 100644 index 0000000000..1b59a13c94 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -0,0 +1,685 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Depth/stencil testing to LLVM IR translation. + * + * To be done accurately/efficiently the depth/stencil test must be done with + * the same type/format of the depth/stencil buffer, which implies massaging + * the incoming depths to fit into place. Using a more straightforward + * type/format for depth/stencil values internally and only convert when + * flushing would avoid this, but it would most likely result in depth fighting + * artifacts. + * + * We are free to use a different pixel layout though. Since our basic + * processing unit is a quad (2x2 pixel block) we store the depth/stencil + * values tiled, a quad at time. That is, a depth buffer containing + * + * Z11 Z12 Z13 Z14 ... + * Z21 Z22 Z23 Z24 ... + * Z31 Z32 Z33 Z34 ... + * Z41 Z42 Z43 Z44 ... + * ... ... ... ... ... + * + * will actually be stored in memory as + * + * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ... + * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ... + * ... ... ... ... ... ... ... ... ... + * + * + * Stencil test: + * Two-sided stencil test is supported but probably not as efficient as + * it could be. Currently, we use if/then/else constructs to do the + * operations for front vs. back-facing polygons. We could probably do + * both the front and back arithmetic then use a Select() instruction to + * choose the result depending on polyon orientation. We'd have to + * measure performance both ways and see which is better. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_state.h" +#include "util/u_format.h" + +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_swizzle.h" + +#include "lp_bld_depth.h" + + +/** Used to select fields from pipe_stencil_state */ +enum stencil_op { + S_FAIL_OP, + Z_FAIL_OP, + Z_PASS_OP +}; + + + +/** + * Do the stencil test comparison (compare FB stencil values against ref value). + * This will be used twice when generating two-sided stencil code. + * \param stencil the front/back stencil state + * \param stencilRef the stencil reference value, replicated as a vector + * \param stencilVals vector of stencil values from framebuffer + * \return vector mask of pass/fail values (~0 or 0) + */ +static LLVMValueRef +lp_build_stencil_test_single(struct lp_build_context *bld, + const struct pipe_stencil_state *stencil, + LLVMValueRef stencilRef, + LLVMValueRef stencilVals) +{ + const unsigned stencilMax = 255; /* XXX fix */ + struct lp_type type = bld->type; + LLVMValueRef res; + + assert(type.sign); + + assert(stencil->enabled); + + if (stencil->valuemask != stencilMax) { + /* compute stencilRef = stencilRef & valuemask */ + LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask); + stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, ""); + /* compute stencilVals = stencilVals & valuemask */ + stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, ""); + } + + res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); + + return res; +} + + +/** + * Do the one or two-sided stencil test comparison. + * \sa lp_build_stencil_test_single + * \param face an integer indicating front (+) or back (-) facing polygon. + * If NULL, assume front-facing. + */ +static LLVMValueRef +lp_build_stencil_test(struct lp_build_context *bld, + const struct pipe_stencil_state stencil[2], + LLVMValueRef stencilRefs[2], + LLVMValueRef stencilVals, + LLVMValueRef face) +{ + LLVMValueRef res; + + assert(stencil[0].enabled); + + if (stencil[1].enabled && face) { + /* do two-sided test */ + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + LLVMValueRef front_facing; + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef result = bld->undef; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + lp_build_flow_scope_declare(flow_ctx, &result); + + /* front_facing = face > 0.0 */ + front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); + { + result = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); + } + lp_build_else(&if_ctx); + { + result = lp_build_stencil_test_single(bld, &stencil[1], + stencilRefs[1], stencilVals); + } + lp_build_endif(&if_ctx); + + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + + res = result; + } + else { + /* do single-side test */ + res = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); + } + + return res; +} + + +/** + * Apply the stencil operator (add/sub/keep/etc) to the given vector + * of stencil values. + * \return new stencil values vector + */ +static LLVMValueRef +lp_build_stencil_op_single(struct lp_build_context *bld, + const struct pipe_stencil_state *stencil, + enum stencil_op op, + LLVMValueRef stencilRef, + LLVMValueRef stencilVals, + LLVMValueRef mask) + +{ + const unsigned stencilMax = 255; /* XXX fix */ + struct lp_type type = bld->type; + LLVMValueRef res; + LLVMValueRef max = lp_build_const_int_vec(type, stencilMax); + unsigned stencil_op; + + assert(type.sign); + + switch (op) { + case S_FAIL_OP: + stencil_op = stencil->fail_op; + break; + case Z_FAIL_OP: + stencil_op = stencil->zfail_op; + break; + case Z_PASS_OP: + stencil_op = stencil->zpass_op; + break; + default: + assert(0 && "Invalid stencil_op mode"); + stencil_op = PIPE_STENCIL_OP_KEEP; + } + + switch (stencil_op) { + case PIPE_STENCIL_OP_KEEP: + res = stencilVals; + /* we can return early for this case */ + return res; + case PIPE_STENCIL_OP_ZERO: + res = bld->zero; + break; + case PIPE_STENCIL_OP_REPLACE: + res = stencilRef; + break; + case PIPE_STENCIL_OP_INCR: + res = lp_build_add(bld, stencilVals, bld->one); + res = lp_build_min(bld, res, max); + break; + case PIPE_STENCIL_OP_DECR: + res = lp_build_sub(bld, stencilVals, bld->one); + res = lp_build_max(bld, res, bld->zero); + break; + case PIPE_STENCIL_OP_INCR_WRAP: + res = lp_build_add(bld, stencilVals, bld->one); + res = LLVMBuildAnd(bld->builder, res, max, ""); + break; + case PIPE_STENCIL_OP_DECR_WRAP: + res = lp_build_sub(bld, stencilVals, bld->one); + res = LLVMBuildAnd(bld->builder, res, max, ""); + break; + case PIPE_STENCIL_OP_INVERT: + res = LLVMBuildNot(bld->builder, stencilVals, ""); + res = LLVMBuildAnd(bld->builder, res, max, ""); + break; + default: + assert(0 && "bad stencil op mode"); + res = NULL; + } + + if (stencil->writemask != stencilMax) { + /* compute res = (res & mask) | (stencilVals & ~mask) */ + LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask); + LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask"); + LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1"); + LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2"); + res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2"); + } + + /* only the update the vector elements enabled by 'mask' */ + res = lp_build_select(bld, mask, res, stencilVals); + + return res; +} + + +/** + * Do the one or two-sided stencil test op/update. + */ +static LLVMValueRef +lp_build_stencil_op(struct lp_build_context *bld, + const struct pipe_stencil_state stencil[2], + enum stencil_op op, + LLVMValueRef stencilRefs[2], + LLVMValueRef stencilVals, + LLVMValueRef mask, + LLVMValueRef face) + +{ + assert(stencil[0].enabled); + + if (stencil[1].enabled && face) { + /* do two-sided op */ + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + LLVMValueRef front_facing; + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef result = bld->undef; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + lp_build_flow_scope_declare(flow_ctx, &result); + + /* front_facing = face > 0.0 */ + front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); + { + result = lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals, mask); + } + lp_build_else(&if_ctx); + { + result = lp_build_stencil_op_single(bld, &stencil[1], op, + stencilRefs[1], stencilVals, mask); + } + lp_build_endif(&if_ctx); + + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + + return result; + } + else { + /* do single-sided op */ + return lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals, mask); + } +} + + + +/** + * Return a type appropriate for depth/stencil testing. + */ +struct lp_type +lp_depth_type(const struct util_format_description *format_desc, + unsigned length) +{ + struct lp_type type; + unsigned swizzle; + + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + swizzle = format_desc->swizzle[0]; + assert(swizzle < 4); + + memset(&type, 0, sizeof type); + type.width = format_desc->block.bits; + + if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { + type.floating = TRUE; + assert(swizzle == 0); + assert(format_desc->channel[swizzle].size == format_desc->block.bits); + } + else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { + assert(format_desc->block.bits <= 32); + if(format_desc->channel[swizzle].normalized) + type.norm = TRUE; + } + else + assert(0); + + assert(type.width <= length); + type.length = length / type.width; + + return type; +} + + +/** + * Compute bitmask and bit shift to apply to the incoming fragment Z values + * and the Z buffer values needed before doing the Z comparison. + * + * Note that we leave the Z bits in the position that we find them + * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us + * get by with fewer bit twiddling steps. + */ +static boolean +get_z_shift_and_mask(const struct util_format_description *format_desc, + unsigned *shift, unsigned *mask) +{ + const unsigned total_bits = format_desc->block.bits; + unsigned z_swizzle; + unsigned chan; + unsigned padding_left, padding_right; + + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + z_swizzle = format_desc->swizzle[0]; + + if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return FALSE; + + padding_right = 0; + for (chan = 0; chan < z_swizzle; ++chan) + padding_right += format_desc->channel[chan].size; + + padding_left = + total_bits - (padding_right + format_desc->channel[z_swizzle].size); + + if (padding_left || padding_right) { + unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; + unsigned long long mask_right = (1ULL << (padding_right)) - 1; + *mask = mask_left ^ mask_right; + } + else { + *mask = 0xffffffff; + } + + *shift = padding_left; + + return TRUE; +} + + +/** + * Compute bitmask and bit shift to apply to the framebuffer pixel values + * to put the stencil bits in the least significant position. + * (i.e. 0x000000ff) + */ +static boolean +get_s_shift_and_mask(const struct util_format_description *format_desc, + unsigned *shift, unsigned *mask) +{ + unsigned s_swizzle; + unsigned chan, sz; + + s_swizzle = format_desc->swizzle[1]; + + if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return FALSE; + + *shift = 0; + for (chan = 0; chan < s_swizzle; chan++) + *shift += format_desc->channel[chan].size; + + sz = format_desc->channel[s_swizzle].size; + *mask = (1U << sz) - 1U; + + return TRUE; +} + + + +/** + * Generate code for performing depth and/or stencil tests. + * We operate on a vector of values (typically a 2x2 quad). + * + * \param depth the depth test state + * \param stencil the front/back stencil state + * \param type the data type of the fragment depth/stencil values + * \param format_desc description of the depth/stencil surface + * \param mask the alive/dead pixel mask for the quad (vector) + * \param stencil_refs the front/back stencil ref values (scalar) + * \param z_src the incoming depth/stencil values (a 2x2 quad) + * \param zs_dst_ptr pointer to depth/stencil values in framebuffer + * \param facing contains float value indicating front/back facing polygon + */ +void +lp_build_depth_stencil_test(LLVMBuilderRef builder, + const struct pipe_depth_state *depth, + const struct pipe_stencil_state stencil[2], + struct lp_type type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef stencil_refs[2], + LLVMValueRef z_src, + LLVMValueRef zs_dst_ptr, + LLVMValueRef face) +{ + struct lp_build_context bld; + struct lp_build_context sbld; + struct lp_type s_type; + LLVMValueRef zs_dst, z_dst = NULL; + LLVMValueRef stencil_vals = NULL; + LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; + LLVMValueRef z_pass = NULL, s_pass_mask = NULL; + LLVMValueRef orig_mask = mask->value; + + /* Sanity checking */ + { + const unsigned z_swizzle = format_desc->swizzle[0]; + const unsigned s_swizzle = format_desc->swizzle[1]; + + assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || + s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); + + assert(depth->enabled || stencil[0].enabled); + + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + if (stencil[0].enabled) { + assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || + format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM); + } + + assert(z_swizzle < 4); + assert(format_desc->block.bits == type.width); + if (type.floating) { + assert(z_swizzle == 0); + assert(format_desc->channel[z_swizzle].type == + UTIL_FORMAT_TYPE_FLOAT); + assert(format_desc->channel[z_swizzle].size == + format_desc->block.bits); + } + else { + assert(format_desc->channel[z_swizzle].type == + UTIL_FORMAT_TYPE_UNSIGNED); + assert(format_desc->channel[z_swizzle].normalized); + assert(!type.fixed); + assert(!type.sign); + assert(type.norm); + } + } + + + /* Setup build context for Z vals */ + lp_build_context_init(&bld, builder, type); + + /* Setup build context for stencil vals */ + s_type = lp_type_int_vec(type.width); + lp_build_context_init(&sbld, builder, s_type); + + /* Load current z/stencil value from z/stencil buffer */ + zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); + + lp_build_name(zs_dst, "zsbufval"); + + + /* Compute and apply the Z/stencil bitmasks and shifts. + */ + { + unsigned z_shift, z_mask; + unsigned s_shift, s_mask; + + if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { + if (z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); + z_src = LLVMBuildLShr(builder, z_src, shift, ""); + } + + if (z_mask != 0xffffffff) { + LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); + z_src = LLVMBuildAnd(builder, z_src, mask, ""); + z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); + z_bitmask = mask; /* used below */ + } + else { + z_dst = zs_dst; + } + + lp_build_name(z_dst, "zsbuf.z"); + } + + if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { + if (s_shift) { + LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); + stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); + stencil_shift = shift; /* used below */ + } + else { + stencil_vals = zs_dst; + } + + if (s_mask != 0xffffffff) { + LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); + stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); + } + + lp_build_name(stencil_vals, "stencil"); + } + } + + + if (stencil[0].enabled) { + /* convert scalar stencil refs into vectors */ + stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); + stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); + + s_pass_mask = lp_build_stencil_test(&sbld, stencil, + stencil_refs, stencil_vals, face); + + /* apply stencil-fail operator */ + { + LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask); + stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, + stencil_refs, stencil_vals, + s_fail_mask, face); + } + } + + if (depth->enabled) { + /* compare src Z to dst Z, returning 'pass' mask */ + z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); + + if (!stencil[0].enabled) { + /* We can potentially skip all remaining operations here, but only + * if stencil is disabled because we still need to update the stencil + * buffer values. Don't need to update Z buffer values. + */ + lp_build_mask_update(mask, z_pass); + } + + if (depth->writemask) { + LLVMValueRef zselectmask = mask->value; + + /* mask off bits that failed Z test */ + zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); + + /* mask off bits that failed stencil test */ + if (s_pass_mask) { + zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); + } + + /* if combined Z/stencil format, mask off the stencil bits */ + if (z_bitmask) { + zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); + } + + /* Mix the old and new Z buffer values. + * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] + */ + z_dst = lp_build_select(&bld, zselectmask, z_src, z_dst); + } + + if (stencil[0].enabled) { + /* update stencil buffer values according to z pass/fail result */ + LLVMValueRef z_fail_mask, z_pass_mask; + + /* apply Z-fail operator */ + z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass); + stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, + stencil_refs, stencil_vals, + z_fail_mask, face); + + /* apply Z-pass operator */ + z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); + stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + stencil_refs, stencil_vals, + z_pass_mask, face); + } + } + else { + /* No depth test: apply Z-pass operator to stencil buffer values which + * passed the stencil test. + */ + s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); + stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + stencil_refs, stencil_vals, + s_pass_mask, face); + } + + /* The Z bits are already in the right place but we may need to shift the + * stencil bits before ORing Z with Stencil to make the final pixel value. + */ + if (stencil_vals && stencil_shift) + stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, + stencil_shift, ""); + + /* Finally, merge/store the z/stencil values */ + if ((depth->enabled && depth->writemask) || + (stencil[0].enabled && stencil[0].writemask)) { + + if (z_dst && stencil_vals) + zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); + else if (z_dst) + zs_dst = z_dst; + else + zs_dst = stencil_vals; + + LLVMBuildStore(builder, zs_dst, zs_dst_ptr); + } + + if (s_pass_mask) + lp_build_mask_update(mask, s_pass_mask); + + if (depth->enabled && stencil[0].enabled) + lp_build_mask_update(mask, z_pass); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h new file mode 100644 index 0000000000..27dd46b625 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -0,0 +1,66 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Depth/stencil testing to LLVM IR translation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#ifndef LP_BLD_DEPTH_H +#define LP_BLD_DEPTH_H + + +#include "gallivm/lp_bld.h" + + +struct pipe_depth_state; +struct util_format_description; +struct lp_type; +struct lp_build_mask_context; + + +struct lp_type +lp_depth_type(const struct util_format_description *format_desc, + unsigned length); + + +void +lp_build_depth_stencil_test(LLVMBuilderRef builder, + const struct pipe_depth_state *depth, + const struct pipe_stencil_state stencil[2], + struct lp_type type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef stencil_refs[2], + LLVMValueRef zs_src, + LLVMValueRef zs_dst_ptr, + LLVMValueRef facing); + + +#endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c new file mode 100644 index 0000000000..838691e14b --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -0,0 +1,408 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Position and shader input interpolation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_shader_tokens.h" +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "tgsi/tgsi_parse.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_swizzle.h" +#include "lp_bld_interp.h" + + +/* + * The shader JIT function operates on blocks of quads. + * Each block has 2x2 quads and each quad has 2x2 pixels. + * + * We iterate over the quads in order 0, 1, 2, 3: + * + * ################# + * # | # | # + * #---0---#---1---# + * # | # | # + * ################# + * # | # | # + * #---2---#---3---# + * # | # | # + * ################# + * + * Within each quad, we have four pixels which are represented in SOA + * order: + * + * ######### + * # 0 | 1 # + * #---+---# + * # 2 | 3 # + * ######### + * + * So the green channel (for example) of the four pixels is stored in + * a single vector register: {g0, g1, g2, g3}. + */ + + +static void +attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) +{ + if(attrib == 0) + lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix); + else + lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix); +} + + +/** + * Initialize the bld->a0, dadx, dady fields. This involves fetching + * those values from the arrays which are passed into the JIT function. + */ +static void +coeffs_init(struct lp_build_interp_soa_context *bld, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr) +{ + LLVMBuilderRef builder = bld->base.builder; + unsigned attrib; + unsigned chan; + + for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + const unsigned mask = bld->mask[attrib]; + const unsigned interp = bld->interp[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(mask & (1 << chan)) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0); + LLVMValueRef a0 = NULL; + LLVMValueRef dadx = NULL; + LLVMValueRef dady = NULL; + + switch( interp ) { + case TGSI_INTERPOLATE_PERSPECTIVE: + /* fall-through */ + + case TGSI_INTERPOLATE_LINEAR: + dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); + dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); + dadx = lp_build_broadcast_scalar(&bld->base, dadx); + dady = lp_build_broadcast_scalar(&bld->base, dady); + attrib_name(dadx, attrib, chan, ".dadx"); + attrib_name(dady, attrib, chan, ".dady"); + /* fall-through */ + + case TGSI_INTERPOLATE_CONSTANT: + a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), ""); + a0 = lp_build_broadcast_scalar(&bld->base, a0); + attrib_name(a0, attrib, chan, ".a0"); + break; + + default: + assert(0); + break; + } + + bld->a0 [attrib][chan] = a0; + bld->dadx[attrib][chan] = dadx; + bld->dady[attrib][chan] = dady; + } + } + } +} + + +/** + * Emit LLVM code to compute the fragment shader input attribute values. + * For example, for a color input, we'll compute red, green, blue and alpha + * values for the four pixels in a quad. + * Recall that we're operating on 4-element vectors so each arithmetic + * operation is operating on the four pixels in a quad. + */ +static void +attribs_init(struct lp_build_interp_soa_context *bld) +{ + LLVMValueRef x = bld->pos[0]; + LLVMValueRef y = bld->pos[1]; + LLVMValueRef oow = NULL; + unsigned attrib; + unsigned chan; + + for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + const unsigned mask = bld->mask[attrib]; + const unsigned interp = bld->interp[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(mask & (1 << chan)) { + LLVMValueRef a0 = bld->a0 [attrib][chan]; + LLVMValueRef dadx = bld->dadx[attrib][chan]; + LLVMValueRef dady = bld->dady[attrib][chan]; + LLVMValueRef res; + + res = a0; + + if (interp != TGSI_INTERPOLATE_CONSTANT) { + /* res = res + x * dadx */ + res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx)); + /* res = res + y * dady */ + res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady)); + } + + /* Keep the value of the attribute before perspective divide + * for faster updates. + */ + bld->attribs_pre[attrib][chan] = res; + + if (interp == TGSI_INTERPOLATE_PERSPECTIVE) { + LLVMValueRef w = bld->pos[3]; + assert(attrib != 0); + if(!oow) + oow = lp_build_rcp(&bld->base, w); + res = lp_build_mul(&bld->base, res, oow); + } + + attrib_name(res, attrib, chan, ""); + + bld->attribs[attrib][chan] = res; + } + } + } +} + + +/** + * Increment the shader input attribute values. + * This is called when we move from one quad to the next. + */ +static void +attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) +{ + LLVMValueRef oow = NULL; + unsigned attrib; + unsigned chan; + + assert(quad_index < 4); + + for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + const unsigned mask = bld->mask[attrib]; + const unsigned interp = bld->interp[attrib]; + + if (interp != TGSI_INTERPOLATE_CONSTANT) { + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(mask & (1 << chan)) { + LLVMValueRef dadx = bld->dadx[attrib][chan]; + LLVMValueRef dady = bld->dady[attrib][chan]; + LLVMValueRef res; + + res = bld->attribs_pre[attrib][chan]; + + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad */ + /* build res = res + dadx + dadx */ + res = lp_build_add(&bld->base, res, dadx); + res = lp_build_add(&bld->base, res, dadx); + } + + if (quad_index == 2 || quad_index == 3) { + /* bottom-left or bottom-right quad */ + /* build res = res + dady + dady */ + res = lp_build_add(&bld->base, res, dady); + res = lp_build_add(&bld->base, res, dady); + } + + if (interp == TGSI_INTERPOLATE_PERSPECTIVE) { + LLVMValueRef w = bld->pos[3]; + assert(attrib != 0); + if(!oow) + oow = lp_build_rcp(&bld->base, w); + res = lp_build_mul(&bld->base, res, oow); + } + + attrib_name(res, attrib, chan, ""); + + bld->attribs[attrib][chan] = res; + } + } + } + } +} + + +/** + * Generate the position vectors. + * + * Parameter x0, y0 are the integer values with the quad upper left coordinates. + */ +static void +pos_init(struct lp_build_interp_soa_context *bld, + LLVMValueRef x0, + LLVMValueRef y0) +{ + lp_build_name(x0, "pos.x"); + lp_build_name(y0, "pos.y"); + + bld->attribs[0][0] = x0; + bld->attribs[0][1] = y0; +} + + +/** + * Update quad position values when moving to the next quad. + */ +static void +pos_update(struct lp_build_interp_soa_context *bld, int quad_index) +{ + LLVMValueRef x = bld->attribs[0][0]; + LLVMValueRef y = bld->attribs[0][1]; + const int xstep = 2, ystep = 2; + + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad in block */ + /* build x += xstep */ + x = lp_build_add(&bld->base, x, + lp_build_const_vec(bld->base.type, xstep)); + } + + if (quad_index == 2) { + /* bottom-left quad in block */ + /* build y += ystep */ + y = lp_build_add(&bld->base, y, + lp_build_const_vec(bld->base.type, ystep)); + /* build x -= xstep */ + x = lp_build_sub(&bld->base, x, + lp_build_const_vec(bld->base.type, xstep)); + } + + lp_build_name(x, "pos.x"); + lp_build_name(y, "pos.y"); + + bld->attribs[0][0] = x; + bld->attribs[0][1] = y; +} + + +/** + * Initialize fragment shader input attribute info. + */ +void +lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, + const struct tgsi_token *tokens, + boolean flatshade, + LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr, + LLVMValueRef x0, + LLVMValueRef y0) +{ + struct tgsi_parse_context parse; + struct tgsi_full_declaration *decl; + + memset(bld, 0, sizeof *bld); + + lp_build_context_init(&bld->base, builder, type); + + /* For convenience */ + bld->pos = bld->attribs[0]; + bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1]; + + /* Position */ + bld->num_attribs = 1; + bld->mask[0] = TGSI_WRITEMASK_ZW; + bld->interp[0] = TGSI_INTERPOLATE_LINEAR; + + /* Inputs */ + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + decl = &parse.FullToken.FullDeclaration; + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned attrib; + + first = decl->Range.First; + last = decl->Range.Last; + mask = decl->Declaration.UsageMask; + + for( attrib = first; attrib <= last; ++attrib ) { + bld->mask[1 + attrib] = mask; + + /* XXX: have mesa set INTERP_CONSTANT in the fragment + * shader. + */ + if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + flatshade) + bld->interp[1 + attrib] = TGSI_INTERPOLATE_CONSTANT; + else + bld->interp[1 + attrib] = decl->Declaration.Interpolate; + } + + bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1); + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_PROPERTY: + break; + + default: + assert( 0 ); + } + } + tgsi_parse_free( &parse ); + + coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); + + pos_init(bld, x0, y0); + + attribs_init(bld); +} + + +/** + * Advance the position and inputs to the given quad within the block. + */ +void +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index) +{ + assert(quad_index < 4); + + pos_update(bld, quad_index); + + attribs_update(bld, quad_index); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h new file mode 100644 index 0000000000..99a432957c --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Position and shader input interpolation. + * + * Special attention is given to the interpolation of side by side quads. + * Multiplications are made only for the first quad. Interpolation of + * inputs for posterior quads are done exclusively with additions, and + * perspective divide if necessary. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#ifndef LP_BLD_INTERP_H +#define LP_BLD_INTERP_H + + +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_type.h" + +#include "tgsi/tgsi_exec.h" + + + +struct tgsi_token; + + +struct lp_build_interp_soa_context +{ + struct lp_build_context base; + + unsigned num_attribs; + unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */ + unsigned interp[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_INTERPOLATE_x */ + + LLVMValueRef a0 [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + + /* Attribute values before perspective divide */ + LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + + LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + + /* + * Convenience pointers. Callers may access this one. + */ + const LLVMValueRef *pos; + const LLVMValueRef (*inputs)[NUM_CHANNELS]; +}; + + +void +lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, + const struct tgsi_token *tokens, + boolean flatshade, + LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr, + LLVMValueRef x0, + LLVMValueRef y0); + +void +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index); + + +#endif /* LP_BLD_INTERP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_buffer.c b/src/gallium/drivers/llvmpipe/lp_buffer.c deleted file mode 100644 index 9eda972081..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_buffer.c +++ /dev/null @@ -1,119 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_math.h" - -#include "lp_winsys.h" -#include "lp_screen.h" -#include "lp_buffer.h" - - -static void * -llvmpipe_buffer_map(struct pipe_screen *screen, - struct pipe_buffer *buf, - unsigned flags) -{ - struct llvmpipe_buffer *llvmpipe_buf = llvmpipe_buffer(buf); - return llvmpipe_buf->data; -} - - -static void -llvmpipe_buffer_unmap(struct pipe_screen *screen, - struct pipe_buffer *buf) -{ -} - - -static void -llvmpipe_buffer_destroy(struct pipe_buffer *buf) -{ - struct llvmpipe_buffer *sbuf = llvmpipe_buffer(buf); - - if (!sbuf->userBuffer) - align_free(sbuf->data); - - FREE(sbuf); -} - - -static struct pipe_buffer * -llvmpipe_buffer_create(struct pipe_screen *screen, - unsigned alignment, - unsigned usage, - unsigned size) -{ - struct llvmpipe_buffer *buffer = CALLOC_STRUCT(llvmpipe_buffer); - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.screen = screen; - buffer->base.alignment = MAX2(alignment, 16); - buffer->base.usage = usage; - buffer->base.size = size; - - buffer->data = align_malloc(size, alignment); - - return &buffer->base; -} - - -/** - * Create buffer which wraps user-space data. - */ -static struct pipe_buffer * -llvmpipe_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes) -{ - struct llvmpipe_buffer *buffer; - - buffer = CALLOC_STRUCT(llvmpipe_buffer); - if(!buffer) - return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.screen = screen; - buffer->base.size = bytes; - buffer->userBuffer = TRUE; - buffer->data = ptr; - - return &buffer->base; -} - - -void -llvmpipe_init_screen_buffer_funcs(struct pipe_screen *screen) -{ - screen->buffer_create = llvmpipe_buffer_create; - screen->user_buffer_create = llvmpipe_user_buffer_create; - screen->buffer_map = llvmpipe_buffer_map; - screen->buffer_unmap = llvmpipe_buffer_unmap; - screen->buffer_destroy = llvmpipe_buffer_destroy; -} diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 6962a7921b..f7cf06d8d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -45,10 +45,6 @@ #include "lp_query.h" #include "lp_setup.h" - - - - static void llvmpipe_destroy( struct pipe_context *pipe ) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); @@ -68,38 +64,22 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - pipe_texture_reference(&llvmpipe->texture[i], NULL); + pipe_sampler_view_reference(&llvmpipe->fragment_sampler_views[i], NULL); } for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - pipe_texture_reference(&llvmpipe->vertex_textures[i], NULL); + pipe_sampler_view_reference(&llvmpipe->vertex_sampler_views[i], NULL); } for (i = 0; i < Elements(llvmpipe->constants); i++) { if (llvmpipe->constants[i]) { - pipe_buffer_reference(&llvmpipe->constants[i], NULL); + pipe_resource_reference(&llvmpipe->constants[i], NULL); } } align_free( llvmpipe ); } -static unsigned int -llvmpipe_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); - - return lp_setup_is_texture_referenced(llvmpipe->setup, texture); -} - -static unsigned int -llvmpipe_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - return PIPE_UNREFERENCED; -} struct pipe_context * llvmpipe_create_context( struct pipe_screen *screen, void *priv ) @@ -145,6 +125,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state; llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state; + llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state; + llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state; + llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; + llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color; llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref; llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state; @@ -152,12 +136,13 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; - llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures; - llvmpipe->pipe.set_vertex_sampler_textures = llvmpipe_set_vertex_sampler_textures; + llvmpipe->pipe.set_fragment_sampler_views = llvmpipe_set_fragment_sampler_views; + llvmpipe->pipe.set_vertex_sampler_views = llvmpipe_set_vertex_sampler_views; + llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view; + llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy; llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; - llvmpipe->pipe.set_vertex_elements = llvmpipe_set_vertex_elements; llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; @@ -166,16 +151,15 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.clear = llvmpipe_clear; llvmpipe->pipe.flush = llvmpipe_flush; - llvmpipe->pipe.is_texture_referenced = llvmpipe_is_texture_referenced; - llvmpipe->pipe.is_buffer_referenced = llvmpipe_is_buffer_referenced; llvmpipe_init_query_funcs( llvmpipe ); + llvmpipe_init_context_resource_funcs( &llvmpipe->pipe ); /* * Create drawing context and plug our rendering stage into it. */ llvmpipe->draw = draw_create(&llvmpipe->pipe); - if (!llvmpipe->draw) + if (!llvmpipe->draw) goto fail; /* FIXME: devise alternative to draw_texture_samplers */ @@ -191,6 +175,11 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) /* plug in AA line/point stages */ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe); + draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe); + + /* convert points and lines into triangles: */ + draw_wide_point_threshold(llvmpipe->draw, 0.0); + draw_wide_line_threshold(llvmpipe->draw, 0.0); #if USE_DRAW_STAGE_PSTIPPLE /* Do polygon stipple w/ texture map + frag prog? */ diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 955c7eb8e0..4848101ffb 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -45,7 +45,8 @@ struct draw_stage; struct lp_fragment_shader; struct lp_vertex_shader; struct lp_blend_state; -struct setup_context; +struct lp_setup_context; +struct lp_velems_state; struct llvmpipe_context { struct pipe_context pipe; /**< base class */ @@ -58,26 +59,25 @@ struct llvmpipe_context { const struct pipe_rasterizer_state *rasterizer; struct lp_fragment_shader *fs; const struct lp_vertex_shader *vs; + const struct lp_velems_state *velems; /** Other rendering state */ struct pipe_blend_color blend_color; struct pipe_stencil_ref stencil_ref; struct pipe_clip_state clip; - struct pipe_buffer *constants[PIPE_SHADER_TYPES]; + struct pipe_resource *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; - struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; - struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned num_samplers; - unsigned num_textures; + unsigned num_fragment_sampler_views; unsigned num_vertex_samplers; - unsigned num_vertex_textures; - unsigned num_vertex_elements; + unsigned num_vertex_sampler_views; unsigned num_vertex_buffers; unsigned dirty; /**< Mask of LP_NEW_x flags */ @@ -98,7 +98,7 @@ struct llvmpipe_context { int psize_slot; /** The tiling engine */ - struct setup_context *setup; + struct lp_setup_context *setup; /** The primitive drawing context */ struct draw_context *draw; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 3dd68d5794..0b63e1c889 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -35,7 +35,6 @@ #include "pipe/p_context.h" #include "util/u_prim.h" -#include "lp_buffer.h" #include "lp_context.h" #include "lp_state.h" @@ -47,7 +46,7 @@ void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); + llvmpipe_draw_elements(pipe, NULL, 0, 0, mode, start, count); } @@ -58,8 +57,9 @@ llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, */ void llvmpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, + struct pipe_resource *indexBuffer, unsigned indexSize, + int indexBias, unsigned min_index, unsigned max_index, unsigned mode, unsigned start, unsigned count) @@ -75,21 +75,21 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, * Map vertex buffers */ for (i = 0; i < lp->num_vertex_buffers; i++) { - void *buf = llvmpipe_buffer(lp->vertex_buffer[i].buffer)->data; + void *buf = llvmpipe_resource_data(lp->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes = llvmpipe_buffer(indexBuffer)->data; - draw_set_mapped_element_buffer_range(draw, indexSize, + void *mapped_indexes = llvmpipe_resource_data(indexBuffer); + draw_set_mapped_element_buffer_range(draw, indexSize, indexBias, min_index, max_index, mapped_indexes); } else { /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, start, + draw_set_mapped_element_buffer_range(draw, 0, 0, start, start + count - 1, NULL); } @@ -103,7 +103,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_vertex_buffer(draw, i, NULL); } if (indexBuffer) { - draw_set_mapped_element_buffer(draw, 0, NULL); + draw_set_mapped_element_buffer(draw, 0, 0, NULL); } /* @@ -117,12 +117,13 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, void llvmpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, + struct pipe_resource *indexBuffer, unsigned indexSize, + int indexBias, unsigned mode, unsigned start, unsigned count) { llvmpipe_draw_range_elements( pipe, indexBuffer, - indexSize, + indexSize, indexBias, 0, 0xffffffff, mode, start, count ); } diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c index 525c117f31..75d8d2b825 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -29,9 +29,19 @@ #include "pipe/p_screen.h" #include "util/u_memory.h" #include "util/u_inlines.h" +#include "lp_debug.h" #include "lp_fence.h" +/** + * Create a new fence object. + * + * The rank will be the number of bins in the scene. Whenever a rendering + * thread hits a fence command, it'll increment the fence counter. When + * the counter == the rank, the fence is finished. + * + * \param rank the expected finished value of the fence counter. + */ struct lp_fence * lp_fence_create(unsigned rank) { @@ -48,6 +58,7 @@ lp_fence_create(unsigned rank) } +/** Destroy a fence. Called when refcount hits zero. */ static void lp_fence_destroy(struct lp_fence *fence) { @@ -57,6 +68,10 @@ lp_fence_destroy(struct lp_fence *fence) } +/** + * For reference counting. + * This is a Gallium API function. + */ static void llvmpipe_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, @@ -71,6 +86,10 @@ llvmpipe_fence_reference(struct pipe_screen *screen, } +/** + * Has the fence been executed/finished? + * This is a Gallium API function. + */ static int llvmpipe_fence_signalled(struct pipe_screen *screen, struct pipe_fence_handle *fence, @@ -82,6 +101,10 @@ llvmpipe_fence_signalled(struct pipe_screen *screen, } +/** + * Wait for the fence to finish. + * This is a Gallium API function. + */ static int llvmpipe_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence_handle, @@ -99,6 +122,25 @@ llvmpipe_fence_finish(struct pipe_screen *screen, } +/** + * Called by the rendering threads to increment the fence counter. + * When the counter == the rank, the fence is finished. + */ +void +lp_fence_signal(struct lp_fence *fence) +{ + pipe_mutex_lock(fence->mutex); + + fence->count++; + assert(fence->count <= fence->rank); + + LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__, + fence->count, fence->rank); + + pipe_condvar_signal(fence->signalled); + + pipe_mutex_unlock(fence->mutex); +} void diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h index c90e6de423..d9270f5784 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.h +++ b/src/gallium/drivers/llvmpipe/lp_fence.h @@ -54,6 +54,10 @@ lp_fence_create(unsigned rank); void +lp_fence_signal(struct lp_fence *fence); + + +void llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen); diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index bf832433be..3627dbd759 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -37,6 +37,10 @@ #include "lp_setup.h" +/** + * \param flags bitmask of PIPE_FLUSH_x flags + * \param fence if non-null, returns pointer to a fench which can be waited on + */ void llvmpipe_flush( struct pipe_context *pipe, unsigned flags, @@ -60,14 +64,10 @@ llvmpipe_flush( struct pipe_context *pipe, } } - /* XXX the lp_setup_flush(flags) param is not a bool, and it's ignored - * at this time! - */ - if (flags & PIPE_FLUSH_SWAPBUFFERS) { - lp_setup_flush( llvmpipe->setup, FALSE ); - } - else if (flags & PIPE_FLUSH_RENDER_CACHE) { - lp_setup_flush( llvmpipe->setup, TRUE ); + /* ask the setup module to flush */ + if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_RENDER_CACHE | + PIPE_FLUSH_TEXTURE_CACHE)) { + lp_setup_flush(llvmpipe->setup, flags); } /* Enable to dump BMPs of the color/depth buffers each frame */ @@ -79,12 +79,12 @@ llvmpipe_flush( struct pipe_context *pipe, for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); - debug_dump_surface(filename, llvmpipe->framebuffer.cbufs[i]); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]); } if (0) { util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); - debug_dump_surface(filename, llvmpipe->framebuffer.zsbuf); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf); } ++frame_no; @@ -92,3 +92,67 @@ llvmpipe_flush( struct pipe_context *pipe, #endif } + +/** + * Flush context if necessary. + * + * TODO: move this logic to an auxiliary library? + * + * FIXME: We must implement DISCARD/DONTBLOCK/UNSYNCHRONIZED/etc for + * textures to avoid blocking. + */ +boolean +llvmpipe_flush_texture(struct pipe_context *pipe, + struct pipe_resource *texture, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_flush) +{ + unsigned referenced; + + referenced = pipe->is_resource_referenced(pipe, texture, face, level); + + if ((referenced & PIPE_REFERENCED_FOR_WRITE) || + ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { + + if (do_not_flush) + return FALSE; + + /* + * TODO: The semantics of these flush flags are too obtuse. They should + * disappear and the pipe driver should just ensure that all visible + * side-effects happen when they need to happen. + */ + if (referenced & PIPE_REFERENCED_FOR_WRITE) + flush_flags |= PIPE_FLUSH_RENDER_CACHE; + + if (referenced & PIPE_REFERENCED_FOR_READ) + flush_flags |= PIPE_FLUSH_TEXTURE_CACHE; + + if (cpu_access) { + /* + * Flush and wait. + */ + + struct pipe_fence_handle *fence = NULL; + + pipe->flush(pipe, flush_flags, &fence); + + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + } else { + /* + * Just flush. + */ + + pipe->flush(pipe, flush_flags, NULL); + } + } + + return TRUE; +} diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index 10b2b52583..2375d22b85 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -28,10 +28,22 @@ #ifndef LP_FLUSH_H #define LP_FLUSH_H +#include "pipe/p_compiler.h" + struct pipe_context; struct pipe_fence_handle; void llvmpipe_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence); +boolean +llvmpipe_flush_texture(struct pipe_context *pipe, + struct pipe_resource *texture, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_flush); + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index bacff500d6..8690941a50 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -51,14 +51,19 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_texture */ { - LLVMTypeRef elem_types[6]; + LLVMTypeRef elem_types[LP_JIT_TEXTURE_NUM_FIELDS]; elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); elem_types[LP_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); - elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); - elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + elem_types[LP_JIT_TEXTURE_ROW_STRIDE] = + LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_LEVELS); + elem_types[LP_JIT_TEXTURE_IMG_STRIDE] = + LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_LEVELS); + elem_types[LP_JIT_TEXTURE_DATA] = + LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), + LP_MAX_TEXTURE_LEVELS); texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -74,9 +79,12 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, last_level, screen->target, texture_type, LP_JIT_TEXTURE_LAST_LEVEL); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride, + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, row_stride, screen->target, texture_type, - LP_JIT_TEXTURE_STRIDE); + LP_JIT_TEXTURE_ROW_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, img_stride, + screen->target, texture_type, + LP_JIT_TEXTURE_IMG_STRIDE); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data, screen->target, texture_type, LP_JIT_TEXTURE_DATA); @@ -88,36 +96,53 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_context */ { - LLVMTypeRef elem_types[8]; + LLVMTypeRef elem_types[LP_JIT_CTX_COUNT]; LLVMTypeRef context_type; - elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ - elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ elem_types[2] = LLVMFloatType(); /* scissor_xmin */ - elem_types[3] = LLVMFloatType(); /* scissor_ymin */ - elem_types[4] = LLVMFloatType(); /* scissor_xmax */ - elem_types[5] = LLVMFloatType(); /* scissor_ymax */ - elem_types[6] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ - elem_types[7] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + elem_types[LP_JIT_CTX_CONSTANTS] = LLVMPointerType(LLVMFloatType(), 0); + elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatType(); + elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = LLVMInt32Type(); + elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32Type(); + elem_types[LP_JIT_CTX_SCISSOR_XMIN] = LLVMFloatType(); + elem_types[LP_JIT_CTX_SCISSOR_YMIN] = LLVMFloatType(); + elem_types[LP_JIT_CTX_SCISSOR_XMAX] = LLVMFloatType(); + elem_types[LP_JIT_CTX_SCISSOR_YMAX] = LLVMFloatType(); + elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8Type(), 0); + elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type, + PIPE_MAX_SAMPLERS); context_type = LLVMStructType(elem_types, Elements(elem_types), 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, - screen->target, context_type, 0); + screen->target, context_type, + LP_JIT_CTX_CONSTANTS); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, - screen->target, context_type, 1); + screen->target, context_type, + LP_JIT_CTX_ALPHA_REF); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_front, + screen->target, context_type, + LP_JIT_CTX_STENCIL_REF_FRONT); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back, + screen->target, context_type, + LP_JIT_CTX_STENCIL_REF_BACK); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmin, - screen->target, context_type, 2); + screen->target, context_type, + LP_JIT_CTX_SCISSOR_XMIN); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymin, - screen->target, context_type, 3); + screen->target, context_type, + LP_JIT_CTX_SCISSOR_YMIN); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmax, - screen->target, context_type, 4); + screen->target, context_type, + LP_JIT_CTX_SCISSOR_XMAX); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymax, - screen->target, context_type, 5); + screen->target, context_type, + LP_JIT_CTX_SCISSOR_YMAX); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, - screen->target, context_type, 6); + screen->target, context_type, + LP_JIT_CTX_BLEND_COLOR); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, screen->target, context_type, - LP_JIT_CONTEXT_TEXTURES_INDEX); + LP_JIT_CTX_TEXTURES); LP_CHECK_STRUCT_SIZE(struct lp_jit_context, screen->target, context_type); @@ -146,8 +171,6 @@ lp_jit_screen_cleanup(struct llvmpipe_screen *screen) void lp_jit_screen_init(struct llvmpipe_screen *screen) { - char *error = NULL; - util_cpu_detect(); #if 0 @@ -159,17 +182,10 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) lp_build_init(); - screen->module = LLVMModuleCreateWithName("llvmpipe"); - - screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module); - - if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) { - _debug_printf("%s\n", error); - LLVMDisposeMessage(error); - assert(0); - } - - screen->target = LLVMGetExecutionEngineTargetData(screen->engine); + screen->module = lp_build_module; + screen->provider = lp_build_provider; + screen->engine = lp_build_engine; + screen->target = lp_build_target; screen->pass = LLVMCreateFunctionPassManager(screen->provider); LLVMAddTargetData(screen->target, screen->pass); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 0ebb2826fa..5d0268c68c 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -39,6 +39,7 @@ #include "gallivm/lp_bld_struct.h" #include "pipe/p_state.h" +#include "lp_texture.h" struct llvmpipe_screen; @@ -50,8 +51,9 @@ struct lp_jit_texture uint32_t height; uint32_t depth; uint32_t last_level; - uint32_t stride; - const void *data; + uint32_t row_stride[LP_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[LP_MAX_TEXTURE_LEVELS]; + const void *data[LP_MAX_TEXTURE_LEVELS]; }; @@ -60,8 +62,10 @@ enum { LP_JIT_TEXTURE_HEIGHT, LP_JIT_TEXTURE_DEPTH, LP_JIT_TEXTURE_LAST_LEVEL, - LP_JIT_TEXTURE_STRIDE, - LP_JIT_TEXTURE_DATA + LP_JIT_TEXTURE_ROW_STRIDE, + LP_JIT_TEXTURE_IMG_STRIDE, + LP_JIT_TEXTURE_DATA, + LP_JIT_TEXTURE_NUM_FIELDS /* number of fields above */ }; @@ -83,6 +87,8 @@ struct lp_jit_context float alpha_ref_value; + uint32_t stencil_ref_front, stencil_ref_back; + /** floats, not ints */ float scissor_xmin, scissor_ymin, scissor_xmax, scissor_ymax; @@ -93,37 +99,66 @@ struct lp_jit_context }; +/** + * These enum values must match the position of the fields in the + * lp_jit_context struct above. + */ +enum { + LP_JIT_CTX_CONSTANTS = 0, + LP_JIT_CTX_ALPHA_REF, + LP_JIT_CTX_STENCIL_REF_FRONT, + LP_JIT_CTX_STENCIL_REF_BACK, + LP_JIT_CTX_SCISSOR_XMIN, + LP_JIT_CTX_SCISSOR_YMIN, + LP_JIT_CTX_SCISSOR_XMAX, + LP_JIT_CTX_SCISSOR_YMAX, + LP_JIT_CTX_BLEND_COLOR, + LP_JIT_CTX_TEXTURES, + LP_JIT_CTX_COUNT +}; + + #define lp_jit_context_constants(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 0, "constants") + lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_CONSTANTS, "constants") #define lp_jit_context_alpha_ref_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value") + lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_ALPHA_REF, "alpha_ref_value") + +#define lp_jit_context_stencil_ref_front_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_FRONT, "stencil_ref_front") + +#define lp_jit_context_stencil_ref_back_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back") #define lp_jit_context_scissor_xmin_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 2, "scissor_xmin") + lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMIN, "scissor_xmin") #define lp_jit_context_scissor_ymin_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 3, "scissor_ymin") + lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMIN, "scissor_ymin") #define lp_jit_context_scissor_xmax_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 4, "scissor_xmax") + lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMAX, "scissor_xmax") #define lp_jit_context_scissor_ymax_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 5, "scissor_ymax") + lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMAX, "scissor_ymax") #define lp_jit_context_blend_color(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 6, "blend_color") - -#define LP_JIT_CONTEXT_TEXTURES_INDEX 7 + lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color") #define lp_jit_context_textures(_builder, _ptr) \ - lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") + lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES, "textures") + + +/** Indexes into jit_function[] array */ +#define RAST_WHOLE 0 +#define RAST_EDGE_TEST 1 typedef void (*lp_jit_frag_func)(const struct lp_jit_context *context, uint32_t x, uint32_t y, + float facing, const void *a0, const void *dadx, const void *dady, diff --git a/src/gallium/drivers/llvmpipe/lp_public.h b/src/gallium/drivers/llvmpipe/lp_public.h new file mode 100644 index 0000000000..ec6b660b48 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_public.h @@ -0,0 +1,10 @@ +#ifndef LP_PUBLIC_H +#define LP_PUBLIC_H + +struct pipe_screen; +struct sw_winsys; + +struct pipe_screen * +llvmpipe_create_screen(struct sw_winsys *winsys); + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index dd9a8e8856..0a41b6406b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -42,15 +42,15 @@ #include "lp_scene.h" -/* Begin rasterizing a scene: +/** + * Begin rasterizing a scene. + * Called once per scene by one thread. */ -static boolean +static void lp_rast_begin( struct lp_rasterizer *rast, struct lp_scene *scene ) { const struct pipe_framebuffer_state *fb = &scene->fb; - boolean write_color = fb->nr_cbufs != 0; - boolean write_zstencil = fb->zsbuf != NULL; int i; rast->curr_scene = scene; @@ -58,57 +58,147 @@ lp_rast_begin( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); rast->state.nr_cbufs = scene->fb.nr_cbufs; - rast->state.write_zstencil = write_zstencil; - rast->state.write_color = write_color; for (i = 0; i < rast->state.nr_cbufs; i++) { - rast->cbuf[i].map = scene->cbuf_map[i]; - rast->cbuf[i].format = scene->cbuf_transfer[i]->texture->format; - rast->cbuf[i].width = scene->cbuf_transfer[i]->width; - rast->cbuf[i].height = scene->cbuf_transfer[i]->height; - rast->cbuf[i].stride = scene->cbuf_transfer[i]->stride; + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + rast->cbuf[i].format = cbuf->texture->format; + rast->cbuf[i].tiles_per_row = align(cbuf->width, TILE_SIZE) / TILE_SIZE; + rast->cbuf[i].blocksize = + util_format_get_blocksize(cbuf->texture->format); + rast->cbuf[i].map = llvmpipe_resource_map(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_LAYOUT_NONE); } - if (write_zstencil) { - rast->zsbuf.map = scene->zsbuf_map; - rast->zsbuf.stride = scene->zsbuf_transfer->stride; + if (fb->zsbuf) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + rast->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level); rast->zsbuf.blocksize = - util_format_get_blocksize(scene->zsbuf_transfer->texture->format); + util_format_get_blocksize(zsbuf->texture->format); + + rast->zsbuf.map = llvmpipe_resource_map(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_LAYOUT_NONE); + assert(rast->zsbuf.map); } lp_scene_bin_iter_begin( scene ); - - return TRUE; } static void lp_rast_end( struct lp_rasterizer *rast ) { - int i; - - lp_scene_reset( rast->curr_scene ); + struct lp_scene *scene = rast->curr_scene; + unsigned i; - for (i = 0; i < rast->state.nr_cbufs; i++) + /* Unmap color buffers */ + for (i = 0; i < rast->state.nr_cbufs; i++) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + llvmpipe_resource_unmap(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice); rast->cbuf[i].map = NULL; + } + + /* Unmap z/stencil buffer */ + if (rast->zsbuf.map) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + llvmpipe_resource_unmap(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice); + rast->zsbuf.map = NULL; + } + + lp_scene_reset( rast->curr_scene ); - rast->zsbuf.map = NULL; rast->curr_scene = NULL; + + if (0) + debug_printf("Post render scene: tile read: %d tile write: %d\n", + tile_read_count, tile_write_count); } + /** * Begining rasterization of a tile. * \param x window X position of the tile, in pixels * \param y window Y position of the tile, in pixels */ static void -lp_rast_start_tile(struct lp_rasterizer_task *task, +lp_rast_tile_begin(struct lp_rasterizer_task *task, unsigned x, unsigned y) { + struct lp_rasterizer *rast = task->rast; + struct lp_scene *scene = rast->curr_scene; + enum lp_texture_usage usage; + unsigned buf; + LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + task->x = x; task->y = y; + + if (scene->has_color_clear) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + + /* get pointers to color tile(s) */ + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; + struct llvmpipe_resource *lpt; + assert(cbuf); + lpt = llvmpipe_resource(cbuf->texture); + task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, + cbuf->face + cbuf->zslice, + cbuf->level, + usage, + x, y); + assert(task->color_tiles[buf]); + } + + /* get pointer to depth/stencil tile */ + { + struct pipe_surface *zsbuf = rast->curr_scene->fb.zsbuf; + if (zsbuf) { + struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture); + + if (scene->has_depth_clear) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + + /* "prime" the tile: convert data from linear to tiled if necessary + * and update the tile's layout info. + */ + (void) llvmpipe_get_texture_tile(lpt, + zsbuf->face + zsbuf->zslice, + zsbuf->level, + usage, + x, y); + /* Get actual pointer to the tile data. Note that depth/stencil + * data is tiled differently than color data. + */ + task->depth_tile = lp_rast_get_depth_block_pointer(rast, x, y); + + assert(task->depth_tile); + } + else { + task->depth_tile = NULL; + } + } } @@ -122,7 +212,7 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, { struct lp_rasterizer *rast = task->rast; const uint8_t *clear_color = arg.clear_color; - uint8_t **color_tile = task->tile.color; + unsigned i; LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, @@ -136,7 +226,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, clear_color[2] == clear_color[3]) { /* clear to grayscale value {x, x, x, x} */ for (i = 0; i < rast->state.nr_cbufs; i++) { - memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4); + uint8_t *ptr = task->color_tiles[i]; + memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4); } } else { @@ -147,8 +238,9 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, */ const unsigned chunk = TILE_SIZE / 4; for (i = 0; i < rast->state.nr_cbufs; i++) { - uint8_t *c = color_tile[i]; + uint8_t *c = task->color_tiles[i]; unsigned j; + for (j = 0; j < 4 * TILE_SIZE; j++) { memset(c, clear_color[0], chunk); c += chunk; @@ -159,7 +251,6 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, memset(c, clear_color[3], chunk); c += chunk; } - assert(c - color_tile[i] == TILE_SIZE * TILE_SIZE * 4); } } @@ -176,23 +267,15 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { struct lp_rasterizer *rast = task->rast; - const unsigned tile_x = task->x; - const unsigned tile_y = task->y; const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; - unsigned block_size = rast->zsbuf.blocksize; + const unsigned block_size = rast->zsbuf.blocksize; + const unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT; uint8_t *dst; - unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT; unsigned i, j; LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); - assert(rast->zsbuf.map); - if (!rast->zsbuf.map) - return; - - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - /* * Clear the aera of the swizzled depth/depth buffer matching this tile, in * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time. @@ -201,7 +284,9 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets. */ - dst = lp_rast_depth_pointer(rast, tile_x, tile_y); + dst = task->depth_tile; + + assert(dst == lp_rast_get_depth_block_pointer(rast, task->x, task->y)); switch (block_size) { case 1: @@ -234,32 +319,73 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, * Load tile color from the framebuffer surface. * This is a bin command called during bin processing. */ +#if 0 void lp_rast_load_color(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { struct lp_rasterizer *rast = task->rast; - const unsigned x = task->x, y = task->y; - unsigned i; + unsigned buf; + enum lp_texture_usage usage; LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); - for (i = 0; i < rast->state.nr_cbufs; i++) { - if (x >= rast->cbuf[i].width || y >= rast->cbuf[i].height) - continue; + if (scene->has_color_clear) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + + /* Get pointers to color tile(s). + * This will convert linear data to tiled if needed. + */ + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; + struct llvmpipe_texture *lpt; + assert(cbuf); + lpt = llvmpipe_texture(cbuf->texture); + task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, + cbuf->face + cbuf->zslice, + cbuf->level, + usage, + task->x, task->y); + assert(task->color_tiles[buf]); + } +} +#endif - lp_tile_read_4ub(rast->cbuf[i].format, - task->tile.color[i], - rast->cbuf[i].map, - rast->cbuf[i].stride, - x, y, - TILE_SIZE, TILE_SIZE); - LP_COUNT(nr_color_tile_load); +/** + * Convert the color tile from tiled to linear layout. + * This is generally only done when we're flushing the scene just prior to + * SwapBuffers. If we didn't do this here, we'd have to convert the entire + * tiled color buffer to linear layout in the llvmpipe_texture_unmap() + * function. It's better to do it here to take advantage of + * threading/parallelism. + * This is a bin command which is stored in all bins. + */ +void +lp_rast_store_color( struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + struct lp_rasterizer *rast = task->rast; + struct lp_scene *scene = rast->curr_scene; + unsigned buf; + + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + struct pipe_surface *cbuf = scene->fb.cbufs[buf]; + const unsigned face = cbuf->face, level = cbuf->level; + struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); + /* this will convert the tiled data to linear if needed */ + (void) llvmpipe_get_texture_tile_linear(lpt, face, level, + LP_TEX_USAGE_READ, + task->x, task->y); } } +/** + * This is a bin command called during bin processing. + */ void lp_rast_set_state(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) @@ -273,7 +399,6 @@ lp_rast_set_state(struct lp_rasterizer_task *task, } - /** * Run the shader on all blocks in a tile. This is used when a tile is * completely contained inside a triangle. @@ -285,7 +410,6 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, { struct lp_rasterizer *rast = task->rast; const struct lp_rast_state *state = task->current_state; - struct lp_rast_tile *tile = &task->tile; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; @@ -297,28 +421,27 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, for (x = 0; x < TILE_SIZE; x += 4) { uint8_t *color[PIPE_MAX_COLOR_BUFS]; uint32_t *depth; - unsigned block_offset, i; - - /* offset of the 16x16 pixel block within the tile */ - block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16); + unsigned i; /* color buffer */ for (i = 0; i < rast->state.nr_cbufs; i++) - color[i] = tile->color[i] + 4 * block_offset; + color[i] = lp_rast_get_color_block_pointer(task, i, + tile_x + x, tile_y + y); /* depth buffer */ - depth = lp_rast_depth_pointer(rast, tile_x + x, tile_y + y); - - /* run shader */ - state->jit_function[0]( &state->jit_context, - tile_x + x, tile_y + y, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - INT_MIN, INT_MIN, INT_MIN, - NULL, NULL, NULL ); + depth = lp_rast_get_depth_block_pointer(rast, tile_x + x, tile_y + y); + + /* run shader on 4x4 block */ + state->jit_function[RAST_WHOLE]( &state->jit_context, + tile_x + x, tile_y + y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + INT_MIN, INT_MIN, INT_MIN, + NULL, NULL, NULL ); } } } @@ -327,6 +450,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, /** * Compute shading for a 4x4 block of pixels. * This is a bin command called during bin processing. + * \param x X position of quad in window coords + * \param y Y position of quad in window coords */ void lp_rast_shade_quads( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, @@ -335,12 +460,9 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, { const struct lp_rast_state *state = task->current_state; struct lp_rasterizer *rast = task->rast; - struct lp_rast_tile *tile = &task->tile; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; unsigned i; - unsigned ix, iy; - int block_offset; assert(state); @@ -351,37 +473,35 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, assert((x % 4) == 0); assert((y % 4) == 0); - ix = x % TILE_SIZE; - iy = y % TILE_SIZE; - - /* offset of the 16x16 pixel block within the tile */ - block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16); - /* color buffer */ - for (i = 0; i < rast->state.nr_cbufs; i++) - color[i] = tile->color[i] + 4 * block_offset; + for (i = 0; i < rast->state.nr_cbufs; i++) { + color[i] = lp_rast_get_color_block_pointer(task, i, x, y); + assert(lp_check_alignment(color[i], 16)); + } /* depth buffer */ - depth = lp_rast_depth_pointer(rast, x, y); + depth = lp_rast_get_depth_block_pointer(rast, x, y); - assert(lp_check_alignment(tile->color[0], 16)); assert(lp_check_alignment(state->jit_context.blend_color, 16)); assert(lp_check_alignment(inputs->step[0], 16)); assert(lp_check_alignment(inputs->step[1], 16)); assert(lp_check_alignment(inputs->step[2], 16)); - /* run shader */ - state->jit_function[1]( &state->jit_context, - x, y, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - c1, c2, c3, - inputs->step[0], inputs->step[1], inputs->step[2]); + /* run shader on 4x4 block */ + state->jit_function[RAST_EDGE_TEST]( &state->jit_context, + x, y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + c1, c2, c3, + inputs->step[0], + inputs->step[1], + inputs->step[2]); } @@ -439,39 +559,31 @@ outline_subtiles(uint8_t *tile) /** - * Write the rasterizer's color tile to the framebuffer. + * Called when we're done writing to a color tile. */ static void -lp_rast_store_color(struct lp_rasterizer_task *task) +lp_rast_tile_end(struct lp_rasterizer_task *task) { +#if DEBUG struct lp_rasterizer *rast = task->rast; - const unsigned x = task->x, y = task->y; - unsigned i; - - for (i = 0; i < rast->state.nr_cbufs; i++) { - if (x >= rast->cbuf[i].width) - continue; + unsigned buf; - if (y >= rast->cbuf[i].height) - continue; - - LP_DBG(DEBUG_RAST, "%s [%u] %d,%d\n", __FUNCTION__, - task->thread_index, x, y); + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + uint8_t *color = lp_rast_get_color_block_pointer(task, buf, + task->x, task->y); if (LP_DEBUG & DEBUG_SHOW_SUBTILES) - outline_subtiles(task->tile.color[i]); + outline_subtiles(color); else if (LP_DEBUG & DEBUG_SHOW_TILES) - outline_tile(task->tile.color[i]); - - lp_tile_write_4ub(rast->cbuf[i].format, - task->tile.color[i], - rast->cbuf[i].map, - rast->cbuf[i].stride, - x, y, - TILE_SIZE, TILE_SIZE); - - LP_COUNT(nr_color_tile_store); + outline_tile(color); } +#else + (void) outline_subtiles; +#endif + + /* debug */ + memset(task->color_tiles, 0, sizeof(task->color_tiles)); + task->depth_tile = NULL; } @@ -485,18 +597,7 @@ lp_rast_fence(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { struct lp_fence *fence = arg.fence; - - pipe_mutex_lock( fence->mutex ); - - fence->count++; - assert(fence->count <= fence->rank); - - LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__, - fence->count, fence->rank); - - pipe_condvar_signal( fence->signalled ); - - pipe_mutex_unlock( fence->mutex ); + lp_fence_signal(fence); } @@ -517,7 +618,7 @@ rasterize_bin(struct lp_rasterizer_task *task, struct cmd_block *block; unsigned k; - lp_rast_start_tile( task, x * TILE_SIZE, y * TILE_SIZE ); + lp_rast_tile_begin( task, x * TILE_SIZE, y * TILE_SIZE ); /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { @@ -526,10 +627,7 @@ rasterize_bin(struct lp_rasterizer_task *task, } } - /* Write the rasterizer's tiles to the framebuffer. - */ - if (task->rast->state.write_color) - lp_rast_store_color(task); + lp_rast_tile_end(task); /* Free data for this bin. */ @@ -544,12 +642,12 @@ static struct { const char *name; } cmd_names[] = { - RAST(load_color), RAST(clear_color), RAST(clear_zstencil), RAST(triangle), RAST(shade_tile), RAST(set_state), + RAST(store_color), RAST(fence), }; @@ -602,8 +700,7 @@ is_empty_bin( const struct cmd_bin *bin ) } for (i = 0; i < head->count; i++) - if (head->cmd[i] != lp_rast_load_color && - head->cmd[i] != lp_rast_set_state) { + if (head->cmd[i] != lp_rast_set_state) { return FALSE; } @@ -663,6 +760,9 @@ lp_rast_queue_scene( struct lp_rasterizer *rast, rasterize_scene( &rast->tasks[0], scene ); lp_scene_reset( scene ); + + lp_rast_end( rast ); + rast->curr_scene = NULL; } else { @@ -774,7 +874,11 @@ create_rast_threads(struct lp_rasterizer *rast) * properly implemented. */ rast->num_threads = 0; #else +#ifdef PIPE_OS_EMBEDDED + rast->num_threads = 0; +#else rast->num_threads = util_cpu_caps.nr_cpus; +#endif rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads); rast->num_threads = MIN2(rast->num_threads, MAX_THREADS); #endif @@ -799,7 +903,7 @@ struct lp_rasterizer * lp_rast_create( void ) { struct lp_rasterizer *rast; - unsigned i, cbuf; + unsigned i; rast = CALLOC_STRUCT(lp_rasterizer); if(!rast) @@ -809,10 +913,6 @@ lp_rast_create( void ) for (i = 0; i < Elements(rast->tasks); i++) { struct lp_rasterizer_task *task = &rast->tasks[i]; - - for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) - task->tile.color[cbuf] = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16); - task->rast = rast; task->thread_index = i; } @@ -830,12 +930,7 @@ lp_rast_create( void ) */ void lp_rast_destroy( struct lp_rasterizer *rast ) { - unsigned i, cbuf; - - for (i = 0; i < Elements(rast->tasks); i++) { - for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) - align_free(rast->tasks[i].tile.color[cbuf]); - } + unsigned i; /* Set exit_flag and signal each thread's work_ready semaphore. * Each thread will be woken up, notice that the exit_flag is set and diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index dc5fc5fc7d..a0ecb2fc47 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -82,6 +82,8 @@ struct lp_rast_state { * These pointers point into the bin data buffer. */ struct lp_rast_shader_inputs { + float facing; /** Positive for front-facing, negative for back-facing */ + float (*a0)[4]; float (*dadx)[4]; float (*dady)[4]; @@ -95,7 +97,7 @@ struct lp_rast_shader_inputs { * Rasterization information for a triangle known to be in this bin, * plus inputs to run the shader: * These fields are tile- and bin-independent. - * Objects of this type are put into the setup_context::data buffer. + * Objects of this type are put into the lp_setup_context::data buffer. */ struct lp_rast_triangle { #ifdef DEBUG @@ -215,9 +217,6 @@ void lp_rast_clear_color( struct lp_rasterizer_task *, void lp_rast_clear_zstencil( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_load_color( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - void lp_rast_set_state( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); @@ -230,4 +229,8 @@ void lp_rast_shade_tile( struct lp_rasterizer_task *, void lp_rast_fence( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void lp_rast_store_color( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 39bf2c2587..8bf2b92a6a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -32,6 +32,8 @@ #include "util/u_format.h" #include "gallivm/lp_bld_debug.h" #include "lp_rast.h" +#include "lp_scene.h" +#include "lp_texture.h" #include "lp_tile_soa.h" @@ -42,24 +44,15 @@ struct lp_rasterizer; /** - * A tile's color and depth memory. - * We can choose whatever layout for the internal tile storage we prefer. - */ -struct lp_rast_tile -{ - uint8_t *color[PIPE_MAX_COLOR_BUFS]; -}; - - -/** * Per-thread rasterization state */ struct lp_rasterizer_task { - struct lp_rast_tile tile; /** Tile color/z/stencil memory */ - unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ + uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS]; + uint8_t *depth_tile; + const struct lp_rast_state *current_state; /** "back" pointer */ @@ -86,9 +79,8 @@ struct lp_rasterizer */ struct { void *map; - unsigned stride; - unsigned width; - unsigned height; + unsigned tiles_per_row; + unsigned blocksize; enum pipe_format format; } cbuf[PIPE_MAX_COLOR_BUFS]; @@ -100,8 +92,6 @@ struct lp_rasterizer struct { unsigned nr_cbufs; - boolean write_color; - boolean write_zstencil; unsigned clear_color; unsigned clear_depth; char clear_stencil; @@ -140,18 +130,23 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, /** - * Get the pointer to the depth buffer for a block. + * Get the pointer to a 4x4 depth/stencil block. + * We'll map the z/stencil buffer on demand here. + * Note that this may be called even when there's no z/stencil buffer - return + * NULL in that case. * \param x, y location of 4x4 block in window coords */ static INLINE void * -lp_rast_depth_pointer( struct lp_rasterizer *rast, - unsigned x, unsigned y ) +lp_rast_get_depth_block_pointer(const struct lp_rasterizer *rast, + unsigned x, unsigned y) { - void * depth; + void *depth; assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); + assert(rast->zsbuf.map || !rast->curr_scene->fb.zsbuf); + if (!rast->zsbuf.map) return NULL; @@ -164,6 +159,37 @@ lp_rast_depth_pointer( struct lp_rasterizer *rast, } +/** + * Get the pointer to a 4x4 color block (within a 64x64 tile). + * We'll map the color buffer on demand here. + * Note that this may be called even when there's no color buffers - return + * NULL in that case. + * \param x, y location of 4x4 block in window coords + */ +static INLINE uint8_t * +lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task, + unsigned buf, unsigned x, unsigned y) +{ + unsigned px, py, pixel_offset; + uint8_t *color; + + assert((x % TILE_VECTOR_WIDTH) == 0); + assert((y % TILE_VECTOR_HEIGHT) == 0); + + color = task->color_tiles[buf]; + assert(color); + + px = x % TILE_SIZE; + py = y % TILE_SIZE; + pixel_offset = tile_pixel_offset(px, py, 0); + + color = color + pixel_offset; + + assert(lp_check_alignment(color, 16)); + return color; +} + + /** * Shade all pixels in a 4x4 block. The fragment code omits the @@ -177,31 +203,27 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, { struct lp_rasterizer *rast = task->rast; const struct lp_rast_state *state = task->current_state; - struct lp_rast_tile *tile = &task->tile; - const unsigned ix = x % TILE_SIZE, iy = y % TILE_SIZE; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; - unsigned block_offset, i; - - /* offset of the containing 16x16 pixel block within the tile */ - block_offset = (iy / 4) * (16 * 16) + (ix / 4) * 16; + unsigned i; /* color buffer */ for (i = 0; i < rast->state.nr_cbufs; i++) - color[i] = tile->color[i] + 4 * block_offset; - - depth = lp_rast_depth_pointer(rast, x, y); - - /* run shader */ - state->jit_function[0]( &state->jit_context, - x, y, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - INT_MIN, INT_MIN, INT_MIN, - NULL, NULL, NULL ); + color[i] = lp_rast_get_color_block_pointer(task, i, x, y); + + depth = lp_rast_get_depth_block_pointer(rast, x, y); + + /* run shader on 4x4 block */ + state->jit_function[RAST_WHOLE]( &state->jit_context, + x, y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + INT_MIN, INT_MIN, INT_MIN, + NULL, NULL, NULL ); } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 72492c0f0c..182e7cb230 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -181,11 +181,14 @@ lp_scene_reset(struct lp_scene *scene ) struct texture_ref *ref, *next, *ref_list = &scene->textures; for (ref = ref_list->next; ref != ref_list; ref = next) { next = next_elem(ref); - pipe_texture_reference(&ref->texture, NULL); + pipe_resource_reference(&ref->texture, NULL); FREE(ref); } make_empty_list(ref_list); } + + scene->has_color_clear = FALSE; + scene->has_depth_clear = FALSE; } @@ -248,12 +251,12 @@ lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ) */ void lp_scene_texture_reference( struct lp_scene *scene, - struct pipe_texture *texture ) + struct pipe_resource *texture ) { struct texture_ref *ref = CALLOC_STRUCT(texture_ref); if (ref) { struct texture_ref *ref_list = &scene->textures; - pipe_texture_reference(&ref->texture, texture); + pipe_resource_reference(&ref->texture, texture); insert_at_tail(ref_list, ref); } } @@ -263,8 +266,8 @@ lp_scene_texture_reference( struct lp_scene *scene, * Does this scene have a reference to the given texture? */ boolean -lp_scene_is_texture_referenced( const struct lp_scene *scene, - const struct pipe_texture *texture ) +lp_scene_is_resource_referenced( const struct lp_scene *scene, + const struct pipe_resource *texture ) { const struct texture_ref *ref_list = &scene->textures; const struct texture_ref *ref; @@ -390,6 +393,7 @@ end: } + /** * Prepare this scene for the rasterizer. * Map the framebuffer surfaces. Initialize the 'rast' state. @@ -397,65 +401,12 @@ end: static boolean lp_scene_map_buffers( struct lp_scene *scene ) { - struct pipe_screen *screen = scene->pipe->screen; - struct pipe_surface *cbuf, *zsbuf; - int i; - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - - /* Map all color buffers - */ - for (i = 0; i < scene->fb.nr_cbufs; i++) { - cbuf = scene->fb.cbufs[i]; - if (cbuf) { - scene->cbuf_transfer[i] = screen->get_tex_transfer(screen, - cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - cbuf->width, - cbuf->height); - if (!scene->cbuf_transfer[i]) - goto fail; - - scene->cbuf_map[i] = screen->transfer_map(screen, - scene->cbuf_transfer[i]); - if (!scene->cbuf_map[i]) - goto fail; - } - } - - /* Map the zsbuffer - */ - zsbuf = scene->fb.zsbuf; - if (zsbuf) { - scene->zsbuf_transfer = screen->get_tex_transfer(screen, - zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - zsbuf->width, - zsbuf->height); - if (!scene->zsbuf_transfer) - goto fail; - - scene->zsbuf_map = screen->transfer_map(screen, - scene->zsbuf_transfer); - if (!scene->zsbuf_map) - goto fail; - } + /* XXX framebuffer surfaces are no longer mapped here */ + /* XXX move all map/unmap stuff into rast module... */ return TRUE; - -fail: - /* Unmap and release transfers? - */ - return FALSE; } @@ -469,28 +420,29 @@ fail: static void lp_scene_unmap_buffers( struct lp_scene *scene ) { - struct pipe_screen *screen = scene->pipe->screen; +#if 0 unsigned i; for (i = 0; i < scene->fb.nr_cbufs; i++) { - if (scene->cbuf_map[i]) - screen->transfer_unmap(screen, scene->cbuf_transfer[i]); - - if (scene->cbuf_transfer[i]) - screen->tex_transfer_destroy(scene->cbuf_transfer[i]); - - scene->cbuf_transfer[i] = NULL; - scene->cbuf_map[i] = NULL; + if (scene->cbuf_map[i]) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + llvmpipe_resource_unmap(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice); + scene->cbuf_map[i] = NULL; + } } - if (scene->zsbuf_map) - screen->transfer_unmap(screen, scene->zsbuf_transfer); - - if (scene->zsbuf_transfer) - screen->tex_transfer_destroy(scene->zsbuf_transfer); - - scene->zsbuf_transfer = NULL; - scene->zsbuf_map = NULL; + if (scene->zsbuf_map) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + llvmpipe_resource_unmap(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice); + scene->zsbuf_map = NULL; + } +#endif util_unreference_framebuffer_state( &scene->fb ); } @@ -505,6 +457,9 @@ void lp_scene_begin_binning( struct lp_scene *scene, scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE; scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE; + + assert(scene->tiles_x <= TILES_X); + assert(scene->tiles_y <= TILES_Y); } @@ -524,7 +479,6 @@ void lp_scene_rasterize( struct lp_scene *scene, } } - scene->write_depth = (scene->fb.zsbuf != NULL && write_depth); diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 739ac22908..ac0717db6a 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -99,7 +99,7 @@ struct data_block_list { /** List of texture references */ struct texture_ref { - struct pipe_texture *texture; + struct pipe_resource *texture; struct texture_ref *prev, *next; /**< linked list w/ u_simple_list.h */ }; @@ -114,13 +114,6 @@ struct texture_ref { */ struct lp_scene { struct pipe_context *pipe; - struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS]; - struct pipe_transfer *zsbuf_transfer; - - /* Scene's buffers are mapped at the time the scene is enqueued: - */ - void *cbuf_map[PIPE_MAX_COLOR_BUFS]; - uint8_t *zsbuf_map; /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; @@ -129,6 +122,8 @@ struct lp_scene { struct texture_ref textures; boolean write_depth; + boolean has_color_clear; + boolean has_depth_clear; /** * Number of active tiles in each dimension. @@ -170,10 +165,10 @@ unsigned lp_scene_data_size( const struct lp_scene *scene ); unsigned lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ); void lp_scene_texture_reference( struct lp_scene *scene, - struct pipe_texture *texture ); + struct pipe_resource *texture ); -boolean lp_scene_is_texture_referenced( const struct lp_scene *scene, - const struct pipe_texture *texture ); +boolean lp_scene_is_resource_referenced( const struct lp_scene *scene, + const struct pipe_resource *texture ); /** @@ -306,6 +301,7 @@ lp_scene_bin_iter_begin( struct lp_scene *scene ); struct cmd_bin * lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); + void lp_scene_rasterize( struct lp_scene *scene, struct lp_rasterizer *rast, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index f84ede675b..7d2cd0c767 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -28,17 +28,19 @@ #include "util/u_memory.h" #include "util/u_format.h" +#include "util/u_format_s3tc.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" #include "lp_texture.h" -#include "lp_buffer.h" #include "lp_fence.h" -#include "lp_winsys.h" #include "lp_jit.h" #include "lp_screen.h" #include "lp_context.h" #include "lp_debug.h" +#include "lp_public.h" + +#include "state_tracker/sw_winsys.h" #ifdef DEBUG int LP_DEBUG = 0; @@ -107,11 +109,11 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; /* max 4Kx4K */ + return LP_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 9; /* max 256x256x256 */ + return LP_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; /* max 4Kx4K */ + return LP_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: @@ -167,7 +169,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, unsigned geom_flags ) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - struct llvmpipe_winsys *winsys = screen->winsys; + struct sw_winsys *winsys = screen->winsys; const struct util_format_description *format_desc; format_desc = util_format_description(format); @@ -179,35 +181,25 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); - switch(format) { - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return FALSE; - default: - break; - } + if(tex_usage & PIPE_BIND_RENDER_TARGET) { + if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) + return FALSE; - if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { if(format_desc->block.width != 1 || format_desc->block.height != 1) return FALSE; - if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) - return FALSE; - if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) return FALSE; } - if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { - if(!winsys->is_displaytarget_format_supported(winsys, format)) + if(tex_usage & PIPE_BIND_DISPLAY_TARGET) { + if(!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) return FALSE; } - if(tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { + if(tex_usage & PIPE_BIND_DEPTH_STENCIL) { if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) return FALSE; @@ -216,40 +208,20 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, return FALSE; } - /* FIXME: Temporary restrictions. See lp_bld_sample_soa.c */ - if(tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) { - if(format_desc->block.width != 1 || - format_desc->block.height != 1) - return FALSE; - - if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) - return FALSE; - - if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && - format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) - return FALSE; - - /* not supported yet */ - if (format == PIPE_FORMAT_Z16_UNORM) - return FALSE; + switch(format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return util_format_s3tc_enabled; + default: + break; } return TRUE; } -static struct pipe_buffer * -llvmpipe_surface_buffer_create(struct pipe_screen *screen, - unsigned width, unsigned height, - enum pipe_format format, - unsigned tex_usage, - unsigned usage, - unsigned *stride) -{ - /* This function should never be used */ - assert(0); - return NULL; -} static void @@ -258,8 +230,8 @@ llvmpipe_flush_frontbuffer(struct pipe_screen *_screen, void *context_private) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - struct llvmpipe_winsys *winsys = screen->winsys; - struct llvmpipe_texture *texture = llvmpipe_texture(surface->texture); + struct sw_winsys *winsys = screen->winsys; + struct llvmpipe_resource *texture = llvmpipe_resource(surface->texture); assert(texture->dt); if (texture->dt) @@ -271,7 +243,7 @@ static void llvmpipe_destroy_screen( struct pipe_screen *_screen ) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - struct llvmpipe_winsys *winsys = screen->winsys; + struct sw_winsys *winsys = screen->winsys; lp_jit_screen_cleanup(screen); @@ -288,7 +260,7 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen ) * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). */ struct pipe_screen * -llvmpipe_create_screen(struct llvmpipe_winsys *winsys) +llvmpipe_create_screen(struct sw_winsys *winsys) { struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen); @@ -309,12 +281,12 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys) screen->base.get_paramf = llvmpipe_get_paramf; screen->base.is_format_supported = llvmpipe_is_format_supported; - screen->base.surface_buffer_create = llvmpipe_surface_buffer_create; screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; - llvmpipe_init_screen_texture_funcs(&screen->base); - llvmpipe_init_screen_buffer_funcs(&screen->base); + util_format_s3tc_init(); + + llvmpipe_init_screen_resource_funcs(&screen->base); llvmpipe_init_screen_fence_funcs(&screen->base); lp_jit_screen_init(screen); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h index 4a1b4d6f3e..af25e043cc 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/src/gallium/drivers/llvmpipe/lp_screen.h @@ -34,23 +34,21 @@ #ifndef LP_SCREEN_H #define LP_SCREEN_H -#include <llvm-c/Core.h> -#include <llvm-c/Analysis.h> -#include <llvm-c/Target.h> +#include "gallivm/lp_bld.h" #include <llvm-c/ExecutionEngine.h> #include "pipe/p_screen.h" #include "pipe/p_defines.h" -struct llvmpipe_winsys; +struct sw_winsys; struct llvmpipe_screen { struct pipe_screen base; - struct llvmpipe_winsys *winsys; + struct sw_winsys *winsys; LLVMModuleRef module; LLVMExecutionEngineRef engine; @@ -76,4 +74,5 @@ llvmpipe_screen( struct pipe_screen *pipe ) } + #endif /* LP_SCREEN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b0713c3b71..6be13c60a5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -37,26 +37,27 @@ #include "util/u_memory.h" #include "util/u_pack_color.h" #include "util/u_surface.h" +#include "lp_context.h" #include "lp_scene.h" #include "lp_scene_queue.h" -#include "lp_buffer.h" #include "lp_texture.h" #include "lp_debug.h" #include "lp_fence.h" #include "lp_rast.h" #include "lp_setup_context.h" #include "lp_screen.h" -#include "lp_winsys.h" +#include "lp_state.h" +#include "state_tracker/sw_winsys.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" -static void set_scene_state( struct setup_context *, unsigned ); +static void set_scene_state( struct lp_setup_context *, enum setup_state ); struct lp_scene * -lp_setup_get_current_scene(struct setup_context *setup) +lp_setup_get_current_scene(struct lp_setup_context *setup) { if (!setup->scene) { @@ -74,7 +75,7 @@ lp_setup_get_current_scene(struct setup_context *setup) static void -first_triangle( struct setup_context *setup, +first_triangle( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4]) @@ -85,7 +86,7 @@ first_triangle( struct setup_context *setup, } static void -first_line( struct setup_context *setup, +first_line( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4]) { @@ -95,7 +96,7 @@ first_line( struct setup_context *setup, } static void -first_point( struct setup_context *setup, +first_point( struct lp_setup_context *setup, const float (*v0)[4]) { set_scene_state( setup, SETUP_ACTIVE ); @@ -103,7 +104,7 @@ first_point( struct setup_context *setup, setup->point( setup, v0 ); } -static void reset_context( struct setup_context *setup ) +static void reset_context( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -131,7 +132,7 @@ static void reset_context( struct setup_context *setup ) /** Rasterize all scene's bins */ static void -lp_setup_rasterize_scene( struct setup_context *setup, +lp_setup_rasterize_scene( struct lp_setup_context *setup, boolean write_depth ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); @@ -148,7 +149,7 @@ lp_setup_rasterize_scene( struct setup_context *setup, static void -begin_binning( struct setup_context *setup ) +begin_binning( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); @@ -157,21 +158,21 @@ begin_binning( struct setup_context *setup ) (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) ? "clear": "load"); if (setup->fb.nr_cbufs) { - if (setup->clear.flags & PIPE_CLEAR_COLOR) + if (setup->clear.flags & PIPE_CLEAR_COLOR) { lp_scene_bin_everywhere( scene, lp_rast_clear_color, setup->clear.color ); - else - lp_scene_bin_everywhere( scene, - lp_rast_load_color, - lp_rast_arg_null() ); + scene->has_color_clear = TRUE; + } } if (setup->fb.zsbuf) { - if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) { lp_scene_bin_everywhere( scene, lp_rast_clear_zstencil, setup->clear.zstencil ); + scene->has_depth_clear = TRUE; + } } LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); @@ -184,7 +185,7 @@ begin_binning( struct setup_context *setup ) * TODO: fast path for fullscreen clears and no triangles. */ static void -execute_clears( struct setup_context *setup ) +execute_clears( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -194,8 +195,8 @@ execute_clears( struct setup_context *setup ) static void -set_scene_state( struct setup_context *setup, - unsigned new_state ) +set_scene_state( struct lp_setup_context *setup, + enum setup_state new_state ) { unsigned old_state = setup->state; @@ -222,24 +223,45 @@ set_scene_state( struct setup_context *setup, else lp_setup_rasterize_scene( setup, TRUE ); break; + + default: + assert(0 && "invalid setup state mode"); } setup->state = new_state; } +/** + * \param flags bitmask of PIPE_FLUSH_x flags + */ void -lp_setup_flush( struct setup_context *setup, +lp_setup_flush( struct lp_setup_context *setup, unsigned flags ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + if (setup->scene) { + struct lp_scene *scene = lp_setup_get_current_scene(setup); + union lp_rast_cmd_arg dummy; + + if (flags & (PIPE_FLUSH_SWAPBUFFERS | + PIPE_FLUSH_FRAME)) { + /* Store colors in the linear color buffer(s). + * If we don't do this here, we'll end up converting the tiled + * data to linear in the texture_unmap() function, which will + * not be a parallel/threaded operation as here. + */ + lp_scene_bin_everywhere(scene, lp_rast_store_color, dummy); + } + } + set_scene_state( setup, SETUP_FLUSHED ); } void -lp_setup_bind_framebuffer( struct setup_context *setup, +lp_setup_bind_framebuffer( struct lp_setup_context *setup, const struct pipe_framebuffer_state *fb ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -256,7 +278,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, void -lp_setup_clear( struct setup_context *setup, +lp_setup_clear( struct lp_setup_context *setup, const float *color, double depth, unsigned stencil, @@ -287,15 +309,20 @@ lp_setup_clear( struct setup_context *setup, * binned scene and start again, but I don't see that as being * a common usage. */ - if (flags & PIPE_CLEAR_COLOR) + if (flags & PIPE_CLEAR_COLOR) { lp_scene_bin_everywhere( scene, lp_rast_clear_color, setup->clear.color ); + scene->has_color_clear = TRUE; + } - if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) { lp_scene_bin_everywhere( scene, lp_rast_clear_zstencil, setup->clear.zstencil ); + scene->has_depth_clear = TRUE; + } + } else { /* Put ourselves into the 'pre-clear' state, specifically to try @@ -314,7 +341,7 @@ lp_setup_clear( struct setup_context *setup, * Emit a fence. */ struct pipe_fence_handle * -lp_setup_fence( struct setup_context *setup ) +lp_setup_fence( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ @@ -334,10 +361,11 @@ lp_setup_fence( struct setup_context *setup ) void -lp_setup_set_triangle_state( struct setup_context *setup, +lp_setup_set_triangle_state( struct lp_setup_context *setup, unsigned cull_mode, boolean ccw_is_frontface, - boolean scissor ) + boolean scissor, + boolean gl_rasterization_rules) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -345,12 +373,13 @@ lp_setup_set_triangle_state( struct setup_context *setup, setup->cullmode = cull_mode; setup->triangle = first_triangle; setup->scissor_test = scissor; + setup->pixel_offset = gl_rasterization_rules ? 0.5f : 0.0f; } void -lp_setup_set_fs_inputs( struct setup_context *setup, +lp_setup_set_fs_inputs( struct lp_setup_context *setup, const struct lp_shader_input *input, unsigned nr ) { @@ -361,7 +390,7 @@ lp_setup_set_fs_inputs( struct setup_context *setup, } void -lp_setup_set_fs_functions( struct setup_context *setup, +lp_setup_set_fs_functions( struct lp_setup_context *setup, lp_jit_frag_func jit_function0, lp_jit_frag_func jit_function1, boolean opaque ) @@ -376,19 +405,19 @@ lp_setup_set_fs_functions( struct setup_context *setup, } void -lp_setup_set_fs_constants(struct setup_context *setup, - struct pipe_buffer *buffer) +lp_setup_set_fs_constants(struct lp_setup_context *setup, + struct pipe_resource *buffer) { LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffer); - pipe_buffer_reference(&setup->constants.current, buffer); + pipe_resource_reference(&setup->constants.current, buffer); setup->dirty |= LP_SETUP_NEW_CONSTANTS; } void -lp_setup_set_alpha_ref_value( struct setup_context *setup, +lp_setup_set_alpha_ref_value( struct lp_setup_context *setup, float alpha_ref_value ) { LP_DBG(DEBUG_SETUP, "%s %f\n", __FUNCTION__, alpha_ref_value); @@ -400,7 +429,21 @@ lp_setup_set_alpha_ref_value( struct setup_context *setup, } void -lp_setup_set_blend_color( struct setup_context *setup, +lp_setup_set_stencil_ref_values( struct lp_setup_context *setup, + const ubyte refs[2] ) +{ + LP_DBG(DEBUG_SETUP, "%s %d %d\n", __FUNCTION__, refs[0], refs[1]); + + if (setup->fs.current.jit_context.stencil_ref_front != refs[0] || + setup->fs.current.jit_context.stencil_ref_back != refs[1]) { + setup->fs.current.jit_context.stencil_ref_front = refs[0]; + setup->fs.current.jit_context.stencil_ref_back = refs[1]; + setup->dirty |= LP_SETUP_NEW_FS; + } +} + +void +lp_setup_set_blend_color( struct lp_setup_context *setup, const struct pipe_blend_color *blend_color ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -415,7 +458,7 @@ lp_setup_set_blend_color( struct setup_context *setup, void -lp_setup_set_scissor( struct setup_context *setup, +lp_setup_set_scissor( struct lp_setup_context *setup, const struct pipe_scissor_state *scissor ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -430,7 +473,7 @@ lp_setup_set_scissor( struct setup_context *setup, void -lp_setup_set_flatshade_first( struct setup_context *setup, +lp_setup_set_flatshade_first( struct lp_setup_context *setup, boolean flatshade_first ) { setup->flatshade_first = flatshade_first; @@ -438,7 +481,7 @@ lp_setup_set_flatshade_first( struct setup_context *setup, void -lp_setup_set_vertex_info( struct setup_context *setup, +lp_setup_set_vertex_info( struct lp_setup_context *setup, struct vertex_info *vertex_info ) { /* XXX: just silently holding onto the pointer: @@ -448,11 +491,12 @@ lp_setup_set_vertex_info( struct setup_context *setup, /** - * Called during state validation when LP_NEW_TEXTURE is set. + * Called during state validation when LP_NEW_SAMPLER_VIEW is set. */ void -lp_setup_set_sampler_textures( struct setup_context *setup, - unsigned num, struct pipe_texture **texture) +lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, + unsigned num, + struct pipe_sampler_view **views) { unsigned i; @@ -461,36 +505,47 @@ lp_setup_set_sampler_textures( struct setup_context *setup, assert(num <= PIPE_MAX_SAMPLERS); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - struct pipe_texture *tex = i < num ? texture[i] : NULL; + struct pipe_sampler_view *view = i < num ? views[i] : NULL; - if(tex) { - struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); + if(view) { + struct pipe_resource *tex = view->texture; + struct llvmpipe_resource *lp_tex = llvmpipe_resource(tex); struct lp_jit_texture *jit_tex; jit_tex = &setup->fs.current.jit_context.textures[i]; jit_tex->width = tex->width0; jit_tex->height = tex->height0; jit_tex->depth = tex->depth0; jit_tex->last_level = tex->last_level; - jit_tex->stride = lp_tex->stride[0]; - if(!lp_tex->dt) { - jit_tex->data = lp_tex->data; + + /* We're referencing the texture's internal data, so save a + * reference to it. + */ + pipe_resource_reference(&setup->fs.current_tex[i], tex); + + if (!lp_tex->dt) { + /* regular texture - setup array of mipmap level pointers */ + int j; + for (j = 0; j <= tex->last_level; j++) { + jit_tex->data[j] = + llvmpipe_get_texture_image_all(lp_tex, j, LP_TEX_USAGE_READ, + LP_TEX_LAYOUT_LINEAR); + jit_tex->row_stride[j] = lp_tex->row_stride[j]; + jit_tex->img_stride[j] = lp_tex->img_stride[j]; + } } else { + /* display target texture/surface */ /* * XXX: Where should this be unmapped? */ struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen); - struct llvmpipe_winsys *winsys = screen->winsys; - jit_tex->data = winsys->displaytarget_map(winsys, lp_tex->dt, - PIPE_BUFFER_USAGE_CPU_READ); - assert(jit_tex->data); - } - - /* the scene references this texture */ - { - struct lp_scene *scene = lp_setup_get_current_scene(setup); - lp_scene_texture_reference(scene, tex); + struct sw_winsys *winsys = screen->winsys; + jit_tex->data[0] = winsys->displaytarget_map(winsys, lp_tex->dt, + PIPE_TRANSFER_READ); + jit_tex->row_stride[0] = lp_tex->row_stride[0]; + jit_tex->img_stride[0] = lp_tex->img_stride[0]; + assert(jit_tex->data[0]); } } } @@ -505,8 +560,8 @@ lp_setup_set_sampler_textures( struct setup_context *setup, * being rendered and the current scene being built. */ unsigned -lp_setup_is_texture_referenced( const struct setup_context *setup, - const struct pipe_texture *texture ) +lp_setup_is_resource_referenced( const struct lp_setup_context *setup, + const struct pipe_resource *texture ) { unsigned i; @@ -521,7 +576,7 @@ lp_setup_is_texture_referenced( const struct setup_context *setup, /* check textures referenced by the scene */ for (i = 0; i < Elements(setup->scenes); i++) { - if (lp_scene_is_texture_referenced(setup->scenes[i], texture)) { + if (lp_scene_is_resource_referenced(setup->scenes[i], texture)) { return PIPE_REFERENCED_FOR_READ; } } @@ -534,7 +589,7 @@ lp_setup_is_texture_referenced( const struct setup_context *setup, * Called by vbuf code when we're about to draw something. */ void -lp_setup_update_state( struct setup_context *setup ) +lp_setup_update_state( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); @@ -542,6 +597,20 @@ lp_setup_update_state( struct setup_context *setup ) assert(setup->fs.current.jit_function); + /* Some of the 'draw' pipeline stages may have changed some driver state. + * Make sure we've processed those state changes before anything else. + * + * XXX this is the only place where llvmpipe_context is used in the + * setup code. This may get refactored/changed... + */ + { + struct llvmpipe_context *lp = llvmpipe_context(scene->pipe); + if (lp->dirty) { + llvmpipe_update_derived(lp); + } + assert(lp->dirty == 0); + } + if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) { uint8_t *stored; unsigned i, j; @@ -582,11 +651,11 @@ lp_setup_update_state( struct setup_context *setup ) } if(setup->dirty & LP_SETUP_NEW_CONSTANTS) { - struct pipe_buffer *buffer = setup->constants.current; + struct pipe_resource *buffer = setup->constants.current; if(buffer) { - unsigned current_size = buffer->size; - const void *current_data = llvmpipe_buffer(buffer)->data; + unsigned current_size = buffer->width0; + const void *current_data = llvmpipe_resource_data(buffer); /* TODO: copy only the actually used constants? */ @@ -626,6 +695,7 @@ lp_setup_update_state( struct setup_context *setup ) * the new, current state. So allocate a new lp_rast_state object * and append it to the bin's setup data buffer. */ + uint i; struct lp_rast_state *stored = (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored); if(stored) { @@ -639,6 +709,14 @@ lp_setup_update_state( struct setup_context *setup ) lp_rast_set_state, lp_rast_arg_state(setup->fs.stored) ); } + + /* The scene now references the textures in the rasterization + * state record. Note that now. + */ + for (i = 0; i < Elements(setup->fs.current_tex); i++) { + if (setup->fs.current_tex[i]) + lp_scene_texture_reference(scene, setup->fs.current_tex[i]); + } } } @@ -652,11 +730,17 @@ lp_setup_update_state( struct setup_context *setup ) /* Only caller is lp_setup_vbuf_destroy() */ void -lp_setup_destroy( struct setup_context *setup ) +lp_setup_destroy( struct lp_setup_context *setup ) { + uint i; + reset_context( setup ); - pipe_buffer_reference(&setup->constants.current, NULL); + for (i = 0; i < Elements(setup->fs.current_tex); i++) { + pipe_resource_reference(&setup->fs.current_tex[i], NULL); + } + + pipe_resource_reference(&setup->constants.current, NULL); /* free the scenes in the 'empty' queue */ while (1) { @@ -677,12 +761,12 @@ lp_setup_destroy( struct setup_context *setup ) * the draw module. Currently also creates a rasterizer to use with * it. */ -struct setup_context * +struct lp_setup_context * lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ) { unsigned i; - struct setup_context *setup = CALLOC_STRUCT(setup_context); + struct lp_setup_context *setup = CALLOC_STRUCT(lp_setup_context); if (!setup) return NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 17c112b528..e10d37d8d0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -52,87 +52,92 @@ struct lp_shader_input { unsigned src_index; /* where to find values in incoming vertices */ }; -struct pipe_texture; +struct pipe_resource; struct pipe_surface; -struct pipe_buffer; struct pipe_blend_color; struct pipe_screen; struct pipe_framebuffer_state; struct lp_fragment_shader; struct lp_jit_context; -struct setup_context * +struct lp_setup_context * lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ); void -lp_setup_clear(struct setup_context *setup, +lp_setup_clear(struct lp_setup_context *setup, const float *clear_color, double clear_depth, unsigned clear_stencil, unsigned flags); struct pipe_fence_handle * -lp_setup_fence( struct setup_context *setup ); +lp_setup_fence( struct lp_setup_context *setup ); void -lp_setup_flush( struct setup_context *setup, +lp_setup_flush( struct lp_setup_context *setup, unsigned flags ); void -lp_setup_bind_framebuffer( struct setup_context *setup, +lp_setup_bind_framebuffer( struct lp_setup_context *setup, const struct pipe_framebuffer_state *fb ); void -lp_setup_set_triangle_state( struct setup_context *setup, +lp_setup_set_triangle_state( struct lp_setup_context *setup, unsigned cullmode, boolean front_is_ccw, - boolean scissor ); + boolean scissor, + boolean gl_rasterization_rules ); void -lp_setup_set_fs_inputs( struct setup_context *setup, +lp_setup_set_fs_inputs( struct lp_setup_context *setup, const struct lp_shader_input *interp, unsigned nr ); void -lp_setup_set_fs_functions( struct setup_context *setup, +lp_setup_set_fs_functions( struct lp_setup_context *setup, lp_jit_frag_func jit_function0, lp_jit_frag_func jit_function1, boolean opaque ); void -lp_setup_set_fs_constants(struct setup_context *setup, - struct pipe_buffer *buffer); +lp_setup_set_fs_constants(struct lp_setup_context *setup, + struct pipe_resource *buffer); void -lp_setup_set_alpha_ref_value( struct setup_context *setup, +lp_setup_set_alpha_ref_value( struct lp_setup_context *setup, float alpha_ref_value ); void -lp_setup_set_blend_color( struct setup_context *setup, +lp_setup_set_stencil_ref_values( struct lp_setup_context *setup, + const ubyte refs[2] ); + +void +lp_setup_set_blend_color( struct lp_setup_context *setup, const struct pipe_blend_color *blend_color ); void -lp_setup_set_scissor( struct setup_context *setup, +lp_setup_set_scissor( struct lp_setup_context *setup, const struct pipe_scissor_state *scissor ); void -lp_setup_set_sampler_textures( struct setup_context *setup, - unsigned num, struct pipe_texture **texture); +lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, + unsigned num, + struct pipe_sampler_view **views); unsigned -lp_setup_is_texture_referenced( const struct setup_context *setup, - const struct pipe_texture *texture ); +lp_setup_is_resource_referenced( const struct lp_setup_context *setup, + const struct pipe_resource *texture ); void -lp_setup_set_flatshade_first( struct setup_context *setup, +lp_setup_set_flatshade_first( struct lp_setup_context *setup, boolean flatshade_first ); void -lp_setup_set_vertex_info( struct setup_context *setup, +lp_setup_set_vertex_info( struct lp_setup_context *setup, struct vertex_info *info ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index a5fc34e54a..4594f7597d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -65,7 +65,7 @@ struct lp_scene_queue; * Subclass of vbuf_render, plugged directly into the draw module as * the rendering backend. */ -struct setup_context +struct lp_setup_context { struct vbuf_render base; @@ -89,6 +89,7 @@ struct setup_context boolean ccw_is_frontface; boolean scissor_test; unsigned cullmode; + float pixel_offset; struct pipe_framebuffer_state fb; @@ -98,7 +99,7 @@ struct setup_context union lp_rast_cmd_arg zstencil; /**< lp_rast_clear_zstencil() cmd */ } clear; - enum { + enum setup_state { SETUP_FLUSHED, SETUP_CLEARED, SETUP_ACTIVE @@ -110,11 +111,12 @@ struct setup_context const struct lp_rast_state *stored; /**< what's in the scene */ struct lp_rast_state current; /**< currently set state */ + struct pipe_resource *current_tex[PIPE_MAX_SAMPLERS]; } fs; /** fragment shader constants */ struct { - struct pipe_buffer *current; + struct pipe_resource *current; unsigned stored_size; const void *stored_data; } constants; @@ -131,29 +133,29 @@ struct setup_context unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ - void (*point)( struct setup_context *, + void (*point)( struct lp_setup_context *, const float (*v0)[4]); - void (*line)( struct setup_context *, + void (*line)( struct lp_setup_context *, const float (*v0)[4], const float (*v1)[4]); - void (*triangle)( struct setup_context *, + void (*triangle)( struct lp_setup_context *, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4]); }; -void lp_setup_choose_triangle( struct setup_context *setup ); -void lp_setup_choose_line( struct setup_context *setup ); -void lp_setup_choose_point( struct setup_context *setup ); +void lp_setup_choose_triangle( struct lp_setup_context *setup ); +void lp_setup_choose_line( struct lp_setup_context *setup ); +void lp_setup_choose_point( struct lp_setup_context *setup ); -struct lp_scene *lp_setup_get_current_scene(struct setup_context *setup); +struct lp_scene *lp_setup_get_current_scene(struct lp_setup_context *setup); -void lp_setup_init_vbuf(struct setup_context *setup); +void lp_setup_init_vbuf(struct lp_setup_context *setup); -void lp_setup_update_state( struct setup_context *setup ); +void lp_setup_update_state( struct lp_setup_context *setup ); -void lp_setup_destroy( struct setup_context *setup ); +void lp_setup_destroy( struct lp_setup_context *setup ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index feea79d394..be41c44e6f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -31,7 +31,7 @@ #include "lp_setup_context.h" -static void line_nop( struct setup_context *setup, +static void line_nop( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4] ) { @@ -39,7 +39,7 @@ static void line_nop( struct setup_context *setup, void -lp_setup_choose_line( struct setup_context *setup ) +lp_setup_choose_line( struct lp_setup_context *setup ) { setup->line = line_nop; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index f03ca729b2..9f69e6c5ce 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -31,14 +31,14 @@ #include "lp_setup_context.h" -static void point_nop( struct setup_context *setup, +static void point_nop( struct lp_setup_context *setup, const float (*v0)[4] ) { } void -lp_setup_choose_point( struct setup_context *setup ) +lp_setup_choose_point( struct lp_setup_context *setup ) { setup->point = point_nop; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index e75412ac9a..f8a5816573 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -41,7 +41,8 @@ /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ -static void constant_coef( struct lp_rast_triangle *tri, +static void constant_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, unsigned slot, const float value, unsigned i ) @@ -56,7 +57,8 @@ static void constant_coef( struct lp_rast_triangle *tri, * Compute a0, dadx and dady for a linearly interpolated coefficient, * for a triangle. */ -static void linear_coef( struct lp_rast_triangle *tri, +static void linear_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, float oneoverarea, unsigned slot, const float (*v1)[4], @@ -90,8 +92,8 @@ static void linear_coef( struct lp_rast_triangle *tri, * instead - i'll switch to this later. */ tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + (dadx * (v1[0][0] - setup->pixel_offset) + + dady * (v1[0][1] - setup->pixel_offset))); } @@ -103,7 +105,8 @@ static void linear_coef( struct lp_rast_triangle *tri, * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void perspective_coef( struct lp_rast_triangle *tri, +static void perspective_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, float oneoverarea, unsigned slot, const float (*v1)[4], @@ -125,8 +128,8 @@ static void perspective_coef( struct lp_rast_triangle *tri, tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + (dadx * (v1[0][0] - setup->pixel_offset) + + dady * (v1[0][1] - setup->pixel_offset))); } @@ -137,7 +140,8 @@ static void perspective_coef( struct lp_rast_triangle *tri, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct lp_rast_triangle *tri, +setup_fragcoord_coef(struct lp_setup_context *setup, + struct lp_rast_triangle *tri, float oneoverarea, unsigned slot, const float (*v1)[4], @@ -153,27 +157,33 @@ setup_fragcoord_coef(struct lp_rast_triangle *tri, tri->inputs.dadx[slot][1] = 0.0; tri->inputs.dady[slot][1] = 1.0; /*Z*/ - linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 2); + linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2); /*W*/ - linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 3); + linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3); } -static void setup_facing_coef( struct lp_rast_triangle *tri, +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface is the triangle front facing? + */ +static void setup_facing_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, unsigned slot, boolean frontface ) { - constant_coef( tri, slot, 1.0f - frontface, 0 ); - constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ - constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ - constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ + /* convert TRUE to 1.0 and FALSE to -1.0 */ + constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 ); + constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */ + constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */ + constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */ } /** * Compute the tri->coef[] array dadx, dady, a0 values. */ -static void setup_tri_coefficients( struct setup_context *setup, +static void setup_tri_coefficients( struct lp_setup_context *setup, struct lp_rast_triangle *tri, float oneoverarea, const float (*v1)[4], @@ -185,7 +195,7 @@ static void setup_tri_coefficients( struct setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3); + setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3); /* setup interpolation for all the remaining attributes: */ @@ -196,27 +206,27 @@ static void setup_tri_coefficients( struct setup_context *setup, switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(tri, slot+1, v3[vert_attr][i], i); + constant_coef(setup, tri, slot+1, v3[vert_attr][i], i); break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_POSITION: /* XXX: fix me - duplicates the values in slot zero. */ - setup_fragcoord_coef(tri, oneoverarea, slot+1, v1, v2, v3); + setup_fragcoord_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3); break; case LP_INTERP_FACING: - setup_facing_coef(tri, slot+1, frontface); + setup_facing_coef(setup, tri, slot+1, frontface); break; default: @@ -267,6 +277,32 @@ alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) } +/** + * Print triangle vertex attribs (for debug). + */ +static void +print_triangle(struct lp_setup_context *setup, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4]) +{ + uint i; + + debug_printf("llvmpipe triangle\n"); + for (i = 0; i < setup->fs.nr_inputs; i++) { + debug_printf(" v1[%d]: %f %f %f %f\n", i, + v1[i][0], v1[i][1], v1[i][2], v1[i][3]); + } + for (i = 0; i < setup->fs.nr_inputs; i++) { + debug_printf(" v2[%d]: %f %f %f %f\n", i, + v2[i][0], v2[i][1], v2[i][2], v2[i][3]); + } + for (i = 0; i < setup->fs.nr_inputs; i++) { + debug_printf(" v3[%d]: %f %f %f %f\n", i, + v3[i][0], v3[i][1], v3[i][2], v3[i][3]); + } +} + /** * Do basic setup for triangle rasterization and determine which @@ -274,19 +310,19 @@ alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) * bins for the tiles which we overlap. */ static void -do_triangle_ccw(struct setup_context *setup, +do_triangle_ccw(struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontfacing ) { /* x/y positions in fixed point */ - const int x1 = subpixel_snap(v1[0][0]); - const int x2 = subpixel_snap(v2[0][0]); - const int x3 = subpixel_snap(v3[0][0]); - const int y1 = subpixel_snap(v1[0][1]); - const int y2 = subpixel_snap(v2[0][1]); - const int y3 = subpixel_snap(v3[0][1]); + const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset); + const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset); + const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset); + const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset); + const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset); + const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset); struct lp_scene *scene = lp_setup_get_current_scene(setup); struct lp_rast_triangle *tri; @@ -295,6 +331,9 @@ do_triangle_ccw(struct setup_context *setup, int minx, maxx, miny, maxy; unsigned tri_bytes; + if (0) + print_triangle(setup, v1, v2, v3); + tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes); #ifdef DEBUG @@ -356,6 +395,8 @@ do_triangle_ccw(struct setup_context *setup, */ setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing ); + tri->inputs.facing = frontfacing ? 1.0F : -1.0F; + /* half-edge constants, will be interated over the whole render target. */ tri->c1 = tri->dy12 * x1 - tri->dx12 * y1; @@ -565,7 +606,10 @@ do_triangle_ccw(struct setup_context *setup, } -static void triangle_cw( struct setup_context *setup, +/** + * Draw triangle if it's CW, cull otherwise. + */ +static void triangle_cw( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -574,7 +618,10 @@ static void triangle_cw( struct setup_context *setup, } -static void triangle_ccw( struct setup_context *setup, +/** + * Draw triangle if it's CCW, cull otherwise. + */ +static void triangle_ccw( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -583,7 +630,11 @@ static void triangle_ccw( struct setup_context *setup, } -static void triangle_both( struct setup_context *setup, + +/** + * Draw triangle whether it's CW or CCW. + */ +static void triangle_both( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -602,7 +653,7 @@ static void triangle_both( struct setup_context *setup, } -static void triangle_nop( struct setup_context *setup, +static void triangle_nop( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -611,7 +662,7 @@ static void triangle_nop( struct setup_context *setup, void -lp_setup_choose_triangle( struct setup_context *setup ) +lp_setup_choose_triangle( struct lp_setup_context *setup ) { switch (setup->cullmode) { case PIPE_WINDING_NONE: diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 24291da91e..a401275478 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -48,10 +48,10 @@ /** cast wrapper */ -static struct setup_context * -setup_context(struct vbuf_render *vbr) +static struct lp_setup_context * +lp_setup_context(struct vbuf_render *vbr) { - return (struct setup_context *) vbr; + return (struct lp_setup_context *) vbr; } @@ -59,7 +59,11 @@ setup_context(struct vbuf_render *vbr) static const struct vertex_info * lp_setup_get_vertex_info(struct vbuf_render *vbr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); + + /* vertex size/info depends on the latest state */ + lp_setup_update_state(setup); + return setup->vertex_info; } @@ -68,7 +72,7 @@ static boolean lp_setup_allocate_vertices(struct vbuf_render *vbr, ushort vertex_size, ushort nr_vertices) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); unsigned size = vertex_size * nr_vertices; if (setup->vertex_buffer_size < size) { @@ -92,7 +96,7 @@ lp_setup_release_vertices(struct vbuf_render *vbr) static void * lp_setup_map_vertices(struct vbuf_render *vbr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); return setup->vertex_buffer; } @@ -101,7 +105,7 @@ lp_setup_unmap_vertices(struct vbuf_render *vbr, ushort min_index, ushort max_index ) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); assert( setup->vertex_buffer_size >= (max_index+1) * setup->vertex_size ); /* do nothing */ } @@ -110,7 +114,7 @@ lp_setup_unmap_vertices(struct vbuf_render *vbr, static boolean lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim) { - setup_context(vbr)->prim = prim; + lp_setup_context(vbr)->prim = prim; return TRUE; } @@ -129,7 +133,7 @@ static INLINE const_float4_ptr get_vert( const void *vertex_buffer, static void lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); const unsigned stride = setup->vertex_info->size * sizeof(float); const void *vertex_buffer = setup->vertex_buffer; unsigned i; @@ -231,57 +235,29 @@ lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_QUADS: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); } break; case PIPE_PRIM_QUAD_STRIP: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride)); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); } break; @@ -312,7 +288,7 @@ lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) static void lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); const unsigned stride = setup->vertex_info->size * sizeof(float); const void *vertex_buffer = (void *) get_vert(setup->vertex_buffer, start, stride); @@ -415,57 +391,28 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_QUADS: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); } break; case PIPE_PRIM_QUAD_STRIP: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - setup->triangle( setup, - - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-0, stride) ); - } + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride) ); } break; @@ -493,7 +440,7 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) static void lp_setup_vbuf_destroy(struct vbuf_render *vbr) { - lp_setup_destroy(setup_context(vbr)); + lp_setup_destroy(lp_setup_context(vbr)); } @@ -501,7 +448,7 @@ lp_setup_vbuf_destroy(struct vbuf_render *vbr) * Create the post-transform vertex handler for the given context. */ void -lp_setup_init_vbuf(struct setup_context *setup) +lp_setup_init_vbuf(struct lp_setup_context *setup) { setup->base.max_indices = LP_MAX_VBUF_INDEXES; setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 9beba32271..dcbff190b6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -31,7 +31,7 @@ #ifndef LP_STATE_H #define LP_STATE_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" @@ -50,7 +50,7 @@ #define LP_NEW_DEPTH_STENCIL_ALPHA 0x100 #define LP_NEW_CONSTANTS 0x200 #define LP_NEW_SAMPLER 0x400 -#define LP_NEW_TEXTURE 0x800 +#define LP_NEW_SAMPLER_VIEW 0x800 #define LP_NEW_VERTEX 0x1000 #define LP_NEW_VS 0x2000 #define LP_NEW_QUERY 0x4000 @@ -67,6 +67,7 @@ struct lp_fragment_shader; struct lp_fragment_shader_variant_key { struct pipe_depth_state depth; + struct pipe_stencil_state stencil[2]; struct pipe_alpha_state alpha; struct pipe_blend_state blend; enum pipe_format zsbuf_format; @@ -119,6 +120,10 @@ struct lp_vertex_shader { struct draw_vertex_shader *draw_data; }; +struct lp_velems_state { + unsigned count; + struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; +}; void * @@ -165,7 +170,7 @@ void llvmpipe_set_clip_state( struct pipe_context *, void llvmpipe_set_constant_buffer(struct pipe_context *, uint shader, uint index, - struct pipe_buffer *buf); + struct pipe_resource *buf); void *llvmpipe_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); @@ -176,28 +181,39 @@ void *llvmpipe_create_vs_state(struct pipe_context *, void llvmpipe_bind_vs_state(struct pipe_context *, void *); void llvmpipe_delete_vs_state(struct pipe_context *, void *); +void *llvmpipe_create_vertex_elements_state(struct pipe_context *, + unsigned count, + const struct pipe_vertex_element *); +void llvmpipe_bind_vertex_elements_state(struct pipe_context *, void *); +void llvmpipe_delete_vertex_elements_state(struct pipe_context *, void *); + void llvmpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); + const struct pipe_poly_stipple * ); void llvmpipe_set_scissor_state( struct pipe_context *, const struct pipe_scissor_state * ); -void llvmpipe_set_sampler_textures( struct pipe_context *, - unsigned num, - struct pipe_texture ** ); +void llvmpipe_set_fragment_sampler_views(struct pipe_context *, + unsigned num, + struct pipe_sampler_view **); + +void +llvmpipe_set_vertex_sampler_views(struct pipe_context *, + unsigned num, + struct pipe_sampler_view **); + +struct pipe_sampler_view * +llvmpipe_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ); void -llvmpipe_set_vertex_sampler_textures(struct pipe_context *, - unsigned num_textures, - struct pipe_texture **); +llvmpipe_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view); void llvmpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); -void llvmpipe_set_vertex_elements(struct pipe_context *, - unsigned count, - const struct pipe_vertex_element *); - void llvmpipe_set_vertex_buffers(struct pipe_context *, unsigned count, const struct pipe_vertex_buffer *); @@ -211,13 +227,13 @@ void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); void llvmpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, + struct pipe_resource *indexBuffer, + unsigned indexSize, int indexBias, unsigned mode, unsigned start, unsigned count); void llvmpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, + struct pipe_resource *indexBuffer, + unsigned indexSize, int indexBias, unsigned min_index, unsigned max_index, unsigned mode, unsigned start, unsigned count); diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index bdd906e1a7..113d77ab78 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -50,88 +50,87 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) { const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo = &llvmpipe->vertex_info; - const uint num = draw_num_shader_outputs(llvmpipe->draw); + struct lp_shader_input inputs[1 + PIPE_MAX_SHADER_INPUTS]; + unsigned vs_index; uint i; - /* Tell setup to tell the draw module to simply emit the whole - * post-xform vertex as-is. - * - * Not really sure if this is the best approach. + /* + * Match FS inputs against VS outputs, emitting the necessary attributes. */ - vinfo->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo); - - - lp_setup_set_vertex_info(llvmpipe->setup, vinfo); -/* - llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, - TGSI_SEMANTIC_PSIZE, 0); -*/ - - /* Now match FS inputs against emitted vertex data. It's also - * entirely possible to just have a fixed layout for FS input, - * determined by the fragment shader itself, and adjust the draw - * outputs to match that. - */ - { - struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; - - for (i = 0; i < lpfs->info.num_inputs; i++) { + vinfo->num_attribs = 0; - /* This can be precomputed, except for flatshade: + vs_index = draw_find_shader_output(llvmpipe->draw, + TGSI_SEMANTIC_POSITION, + 0); + + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); + + for (i = 0; i < lpfs->info.num_inputs; i++) { + /* + * Search for each input in current vs output: + */ + + vs_index = draw_find_shader_output(llvmpipe->draw, + lpfs->info.input_semantic_name[i], + lpfs->info.input_semantic_index[i]); + + /* This can be pre-computed, except for flatshade: + */ + switch (lpfs->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_FACE: + inputs[i].interp = LP_INTERP_FACING; + break; + case TGSI_SEMANTIC_POSITION: + /* Position was already emitted above + */ + inputs[i].interp = LP_INTERP_POSITION; + inputs[i].src_index = 0; + continue; + case TGSI_SEMANTIC_COLOR: + /* Colors are linearly inputs[i].interpolated in the fragment shader + * even when flatshading is active. This just tells the + * setup module to use coefficients with ddx==0 and + * ddy==0. */ - switch (lpfs->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_FACE: - inputs[i].interp = LP_INTERP_FACING; + if (llvmpipe->rasterizer->flatshade) + inputs[i].interp = LP_INTERP_CONSTANT; + else + inputs[i].interp = LP_INTERP_LINEAR; + break; + + default: + switch (lpfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + inputs[i].interp = LP_INTERP_CONSTANT; break; - case TGSI_SEMANTIC_POSITION: - inputs[i].interp = LP_INTERP_POSITION; + case TGSI_INTERPOLATE_LINEAR: + inputs[i].interp = LP_INTERP_LINEAR; break; - case TGSI_SEMANTIC_COLOR: - /* Colors are linearly interpolated in the fragment shader - * even when flatshading is active. This just tells the - * setup module to use coefficients with ddx==0 and - * ddy==0. - */ - if (llvmpipe->rasterizer->flatshade) - inputs[i].interp = LP_INTERP_CONSTANT; - else - inputs[i].interp = LP_INTERP_LINEAR; + case TGSI_INTERPOLATE_PERSPECTIVE: + inputs[i].interp = LP_INTERP_PERSPECTIVE; break; - default: - switch (lpfs->info.input_interpolate[i]) { - case TGSI_INTERPOLATE_CONSTANT: - inputs[i].interp = LP_INTERP_CONSTANT; - break; - case TGSI_INTERPOLATE_LINEAR: - inputs[i].interp = LP_INTERP_LINEAR; - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - inputs[i].interp = LP_INTERP_PERSPECTIVE; - break; - default: - assert(0); - break; - } + assert(0); + break; } - - /* Search for each input in current vs output: - */ - inputs[i].src_index = - draw_find_shader_output(llvmpipe->draw, - lpfs->info.input_semantic_name[i], - lpfs->info.input_semantic_index[i]); } - lp_setup_set_fs_inputs(llvmpipe->setup, - inputs, - lpfs->info.num_inputs); + /* + * Emit the requested fs attribute for all but position. + */ + + inputs[i].src_index = vinfo->num_attribs; + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); } + + draw_compute_vertex_size(vinfo); + + lp_setup_set_vertex_info(llvmpipe->setup, vinfo); + + lp_setup_set_fs_inputs(llvmpipe->setup, + inputs, + lpfs->info.num_inputs); } @@ -150,7 +149,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) */ if (llvmpipe->tex_timestamp != lp_screen->timestamp) { llvmpipe->tex_timestamp = lp_screen->timestamp; - llvmpipe->dirty |= LP_NEW_TEXTURE; + llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW; } if (llvmpipe->dirty & (LP_NEW_RASTERIZER | @@ -164,7 +163,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_DEPTH_STENCIL_ALPHA | LP_NEW_RASTERIZER | LP_NEW_SAMPLER | - LP_NEW_TEXTURE)) + LP_NEW_SAMPLER_VIEW)) llvmpipe_update_fs( llvmpipe ); if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) @@ -174,18 +173,21 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & LP_NEW_SCISSOR) lp_setup_set_scissor(llvmpipe->setup, &llvmpipe->scissor); - if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) + if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) { lp_setup_set_alpha_ref_value(llvmpipe->setup, llvmpipe->depth_stencil->alpha.ref_value); + lp_setup_set_stencil_ref_values(llvmpipe->setup, + llvmpipe->stencil_ref.ref_value); + } if (llvmpipe->dirty & LP_NEW_CONSTANTS) lp_setup_set_fs_constants(llvmpipe->setup, llvmpipe->constants[PIPE_SHADER_FRAGMENT]); - if (llvmpipe->dirty & LP_NEW_TEXTURE) - lp_setup_set_sampler_textures(llvmpipe->setup, - llvmpipe->num_textures, - llvmpipe->texture); + if (llvmpipe->dirty & LP_NEW_SAMPLER_VIEW) + lp_setup_set_fragment_sampler_views(llvmpipe->setup, + llvmpipe->num_fragment_sampler_views, + llvmpipe->fragment_sampler_views); llvmpipe->dirty = 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c4b79dd415..2c4303a895 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -37,10 +37,10 @@ * - early depth test * - fragment shader * - alpha test - * - depth/stencil test (stencil TBI) + * - depth/stencil test * - blending * - * This file has only the glue to assembly the fragment pipeline. The actual + * This file has only the glue to assemble the fragment pipeline. The actual * plumbing of converting Gallium state into LLVM IR is done elsewhere, in the * lp_bld_*.[ch] files, and in a complete generic and reusable way. Here we * muster the LLVM JIT execution engine to create a function that follows an @@ -77,15 +77,15 @@ #include "gallivm/lp_bld_conv.h" #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_logic.h" -#include "gallivm/lp_bld_depth.h" -#include "gallivm/lp_bld_interp.h" #include "gallivm/lp_bld_tgsi.h" -#include "gallivm/lp_bld_alpha.h" -#include "gallivm/lp_bld_blend.h" #include "gallivm/lp_bld_swizzle.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_debug.h" -#include "lp_buffer.h" + +#include "lp_bld_alpha.h" +#include "lp_bld_blend.h" +#include "lp_bld_depth.h" +#include "lp_bld_interp.h" #include "lp_context.h" #include "lp_debug.h" #include "lp_perf.h" @@ -95,6 +95,9 @@ #include "lp_tex_sample.h" +#include <llvm-c/Analysis.h> + + static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; static const unsigned char quad_offset_y[4] = {0, 0, 1, 1}; @@ -135,20 +138,22 @@ generate_pos0(LLVMBuilderRef builder, /** - * Generate the depth test. + * Generate the depth /stencil test code. */ static void -generate_depth(LLVMBuilderRef builder, - const struct lp_fragment_shader_variant_key *key, - struct lp_type src_type, - struct lp_build_mask_context *mask, - LLVMValueRef src, - LLVMValueRef dst_ptr) +generate_depth_stencil(LLVMBuilderRef builder, + const struct lp_fragment_shader_variant_key *key, + struct lp_type src_type, + struct lp_build_mask_context *mask, + LLVMValueRef stencil_refs[2], + LLVMValueRef src, + LLVMValueRef dst_ptr, + LLVMValueRef facing) { const struct util_format_description *format_desc; struct lp_type dst_type; - if(!key->depth.enabled) + if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled) return; format_desc = util_format_description(key->zsbuf_format); @@ -175,19 +180,22 @@ generate_depth(LLVMBuilderRef builder, assert(dst_type.width == src_type.width); assert(dst_type.length == src_type.length); + /* Convert fragment Z from float to integer */ lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); dst_ptr = LLVMBuildBitCast(builder, dst_ptr, LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); - - lp_build_depth_test(builder, - &key->depth, - dst_type, - format_desc, - mask, - src, - dst_ptr); + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + dst_type, + format_desc, + mask, + stencil_refs, + src, + dst_ptr, + facing); } @@ -215,7 +223,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, #endif struct lp_build_flow_context *flow; struct lp_type i32_type; - LLVMTypeRef i32vec4_type, mask_type; + LLVMTypeRef i32vec4_type; LLVMValueRef c0_vec, c1_vec, c2_vec; LLVMValueRef in_out_mask; @@ -231,8 +239,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, i32vec4_type = lp_build_int32_vec4_type(); - mask_type = LLVMIntType(32 * 4); - /* * Use a conditional here to do detailed pixel in/out testing. * We only have to do this if c0 != INT_MIN. @@ -249,7 +255,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMConstInt(LLVMInt32Type(), INT_MIN, 0), ""); - in_out_mask = lp_build_int_const_scalar(i32_type, ~0); + in_out_mask = lp_build_const_int_vec(i32_type, ~0); lp_build_flow_scope_declare(flow, &in_out_mask); @@ -364,7 +370,7 @@ build_int32_vec_const(int value) i32_type.norm = FALSE; /* values are not normalized */ i32_type.width = 32; /* 32-bit int values */ i32_type.length = 4; /* 4 elements per vector */ - return lp_build_int_const_scalar(i32_type, value); + return lp_build_const_int_vec(i32_type, value); } @@ -387,6 +393,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef *pmask, LLVMValueRef (*color)[4], LLVMValueRef depth_ptr, + LLVMValueRef facing, unsigned do_tri_test, LLVMValueRef c0, LLVMValueRef c1, @@ -402,15 +409,19 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z = interp->pos[2]; + LLVMValueRef stencil_refs[2]; struct lp_build_flow_context *flow; struct lp_build_mask_context mask; - boolean early_depth_test; + boolean early_depth_stencil_test; unsigned attrib; unsigned chan; unsigned cbuf; assert(i < 4); + stencil_refs[0] = lp_jit_context_stencil_ref_front_value(builder, context_ptr); + stencil_refs[1] = lp_jit_context_stencil_ref_back_value(builder, context_ptr); + elem_type = lp_build_elem_type(type); vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); @@ -450,20 +461,20 @@ generate_fs(struct llvmpipe_context *lp, lp_build_mask_update(&mask, smask); } - early_depth_test = - key->depth.enabled && + early_depth_stencil_test = + (key->depth.enabled || key->stencil[0].enabled) && !key->alpha.enabled && !shader->info.uses_kill && !shader->info.writes_z; - if(early_depth_test) - generate_depth(builder, key, - type, &mask, - z, depth_ptr); + if (early_depth_stencil_test) + generate_depth_stencil(builder, key, + type, &mask, + stencil_refs, z, depth_ptr, facing); lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, sampler); + outputs, sampler, &shader->info); for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -503,10 +514,10 @@ generate_fs(struct llvmpipe_context *lp, } } - if(!early_depth_test) - generate_depth(builder, key, - type, &mask, - z, depth_ptr); + if (!early_depth_stencil_test) + generate_depth_stencil(builder, key, + type, &mask, + stencil_refs, z, depth_ptr, facing); lp_build_mask_end(&mask); @@ -582,6 +593,20 @@ generate_blend(const struct pipe_blend_state *blend, } +/** casting function to avoid compiler warnings */ +static lp_jit_frag_func +cast_voidptr_to_lp_jit_frag_func(void *p) +{ + union { + void *v; + lp_jit_frag_func f; + } tmp; + assert(sizeof(tmp.v) == sizeof(tmp.f)); + tmp.v = p; + return tmp.f; +} + + /** * Generate the runtime callable function for the whole fragment pipeline. * Note that the function which we generate operates on a block of 16 @@ -603,7 +628,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef blend_int_vec_type; - LLVMTypeRef arg_types[14]; + LLVMTypeRef arg_types[15]; LLVMTypeRef func_type; LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); LLVMValueRef context_ptr; @@ -626,6 +651,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; LLVMValueRef function; + LLVMValueRef facing; unsigned num_fs; unsigned i; unsigned chan; @@ -665,20 +691,21 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ - arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ - arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ - arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ - arg_types[6] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ - arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ - arg_types[8] = LLVMInt32Type(); /* c0 */ - arg_types[9] = LLVMInt32Type(); /* c1 */ - arg_types[10] = LLVMInt32Type(); /* c2 */ + arg_types[3] = LLVMFloatType(); /* facing */ + arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */ + arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */ + arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ + arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ + arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[9] = LLVMInt32Type(); /* c0 */ + arg_types[10] = LLVMInt32Type(); /* c1 */ + arg_types[11] = LLVMInt32Type(); /* c2 */ /* Note: the step arrays are built as int32[16] but we interpret * them here as int32_vec4[4]. */ - arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ - arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ - arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ + arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ + arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ + arg_types[14] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); @@ -698,17 +725,18 @@ generate_fragment(struct llvmpipe_context *lp, context_ptr = LLVMGetParam(function, 0); x = LLVMGetParam(function, 1); y = LLVMGetParam(function, 2); - a0_ptr = LLVMGetParam(function, 3); - dadx_ptr = LLVMGetParam(function, 4); - dady_ptr = LLVMGetParam(function, 5); - color_ptr_ptr = LLVMGetParam(function, 6); - depth_ptr = LLVMGetParam(function, 7); - c0 = LLVMGetParam(function, 8); - c1 = LLVMGetParam(function, 9); - c2 = LLVMGetParam(function, 10); - step0_ptr = LLVMGetParam(function, 11); - step1_ptr = LLVMGetParam(function, 12); - step2_ptr = LLVMGetParam(function, 13); + facing = LLVMGetParam(function, 3); + a0_ptr = LLVMGetParam(function, 4); + dadx_ptr = LLVMGetParam(function, 5); + dady_ptr = LLVMGetParam(function, 6); + color_ptr_ptr = LLVMGetParam(function, 7); + depth_ptr = LLVMGetParam(function, 8); + c0 = LLVMGetParam(function, 9); + c1 = LLVMGetParam(function, 10); + c2 = LLVMGetParam(function, 11); + step0_ptr = LLVMGetParam(function, 12); + step1_ptr = LLVMGetParam(function, 13); + step2_ptr = LLVMGetParam(function, 14); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -750,7 +778,6 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; - int cbuf; if(i != 0) lp_build_interp_soa_update(&interp, i); @@ -767,6 +794,7 @@ generate_fragment(struct llvmpipe_context *lp, &fs_mask[i], /* output */ out_color, depth_ptr_i, + facing, do_tri_test, c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); @@ -842,10 +870,14 @@ generate_fragment(struct llvmpipe_context *lp, /* * Translate the LLVM IR into machine code. */ - variant->jit_function[do_tri_test] = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function); + { + void *f = LLVMGetPointerToGlobal(screen->engine, function); + + variant->jit_function[do_tri_test] = cast_voidptr_to_lp_jit_frag_func(f); - if (LP_DEBUG & DEBUG_ASM) - lp_disassemble(variant->jit_function[do_tri_test]); + if (LP_DEBUG & DEBUG_ASM) + lp_disassemble(f); + } } @@ -1009,11 +1041,11 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_buffer *constants) + struct pipe_resource *constants) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - unsigned size = constants ? constants->size : 0; - const void *data = constants ? llvmpipe_buffer(constants)->data : NULL; + unsigned size = constants ? constants->width0 : 0; + const void *data = constants ? llvmpipe_resource_data(constants) : NULL; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); @@ -1024,7 +1056,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, draw_flush(llvmpipe->draw); /* note: reference counting */ - pipe_buffer_reference(&llvmpipe->constants[shader], constants); + pipe_resource_reference(&llvmpipe->constants[shader], constants); if(shader == PIPE_SHADER_VERTEX) { draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, 0, @@ -1051,10 +1083,15 @@ make_variant_key(struct llvmpipe_context *lp, memset(key, 0, sizeof *key); - if(lp->framebuffer.zsbuf && - lp->depth_stencil->depth.enabled) { - key->zsbuf_format = lp->framebuffer.zsbuf->format; - memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + if (lp->framebuffer.zsbuf) { + if (lp->depth_stencil->depth.enabled) { + key->zsbuf_format = lp->framebuffer.zsbuf->format; + memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + } + if (lp->depth_stencil->stencil[0].enabled) { + key->zsbuf_format = lp->framebuffer.zsbuf->format; + memcpy(&key->stencil, &lp->depth_stencil->stencil, sizeof key->stencil); + } } key->alpha.enabled = lp->depth_stencil->alpha.enabled; @@ -1075,8 +1112,10 @@ make_variant_key(struct llvmpipe_context *lp, unsigned chan; format_desc = util_format_description(lp->framebuffer.cbufs[i]->format); - assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || - format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); + + key->blend.rt[i].colormask = lp->blend->rt[i].colormask; /* mask out color channels not present in the color buffer. * Should be simple to incorporate per-cbuf writemasks: @@ -1084,14 +1123,14 @@ make_variant_key(struct llvmpipe_context *lp, for(chan = 0; chan < 4; ++chan) { enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - if(swizzle <= UTIL_FORMAT_SWIZZLE_W) - key->blend.rt[0].colormask |= (1 << chan); + if(swizzle > UTIL_FORMAT_SWIZZLE_W) + key->blend.rt[i].colormask &= ~(1 << chan); } } for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) - lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]); + lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]); } @@ -1137,6 +1176,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) opaque = !key.blend.logicop_enable && !key.blend.rt[0].blend_enable && key.blend.rt[0].colormask == 0xf && + !key.stencil[0].enabled && !key.alpha.enabled && !key.depth.enabled && !key.scissor && @@ -1144,7 +1184,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) ? TRUE : FALSE; lp_setup_set_fs_functions(lp->setup, - shader->current->jit_function[0], - shader->current->jit_function[1], + shader->current->jit_function[RAST_WHOLE], + shader->current->jit_function[RAST_EDGE_TEST], opaque); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 8592da0d9d..47f65fe72d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -69,7 +69,8 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) lp_setup_set_triangle_state( llvmpipe->setup, llvmpipe->rasterizer->cull_mode, llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW, - llvmpipe->rasterizer->scissor); + llvmpipe->rasterizer->scissor, + llvmpipe->rasterizer->gl_rasterization_rules); } llvmpipe->dirty |= LP_NEW_RASTERIZER; diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index b30a075776..3552ff50ce 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -105,8 +105,9 @@ llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, void -llvmpipe_set_sampler_textures(struct pipe_context *pipe, - unsigned num, struct pipe_texture **texture) +llvmpipe_set_fragment_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); uint i; @@ -114,51 +115,79 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, assert(num <= PIPE_MAX_SAMPLERS); /* Check for no-op */ - if (num == llvmpipe->num_textures && - !memcmp(llvmpipe->texture, texture, num * sizeof(struct pipe_texture *))) + if (num == llvmpipe->num_fragment_sampler_views && + !memcmp(llvmpipe->fragment_sampler_views, views, num * sizeof(struct pipe_sampler_view *))) return; draw_flush(llvmpipe->draw); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - struct pipe_texture *tex = i < num ? texture[i] : NULL; + struct pipe_sampler_view *view = i < num ? views[i] : NULL; - pipe_texture_reference(&llvmpipe->texture[i], tex); + pipe_sampler_view_reference(&llvmpipe->fragment_sampler_views[i], view); } - llvmpipe->num_textures = num; + llvmpipe->num_fragment_sampler_views = num; - llvmpipe->dirty |= LP_NEW_TEXTURE; + llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW; } void -llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe, - unsigned num_textures, - struct pipe_texture **textures) +llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); uint i; - assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS); + assert(num <= PIPE_MAX_VERTEX_SAMPLERS); /* Check for no-op */ - if (num_textures == llvmpipe->num_vertex_textures && - !memcmp(llvmpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) { + if (num == llvmpipe->num_vertex_sampler_views && + !memcmp(llvmpipe->vertex_sampler_views, views, num * sizeof(struct pipe_sampler_view *))) { return; } draw_flush(llvmpipe->draw); for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - struct pipe_texture *tex = i < num_textures ? textures[i] : NULL; + struct pipe_sampler_view *view = i < num ? views[i] : NULL; - pipe_texture_reference(&llvmpipe->vertex_textures[i], tex); + pipe_sampler_view_reference(&llvmpipe->vertex_sampler_views[i], view); } - llvmpipe->num_vertex_textures = num_textures; + llvmpipe->num_vertex_sampler_views = num; - llvmpipe->dirty |= LP_NEW_TEXTURE; + llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW; +} + + +struct pipe_sampler_view * +llvmpipe_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ) +{ + struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); + + if (view) { + *view = *templ; + view->reference.count = 1; + view->texture = NULL; + pipe_resource_reference(&view->texture, texture); + view->context = pipe; + } + + return view; +} + + +void +llvmpipe_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + FREE(view); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 048ac5b968..7d86c5750c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -32,6 +32,7 @@ #include "util/u_inlines.h" #include "util/u_surface.h" #include "lp_context.h" +#include "lp_scene.h" #include "lp_state.h" #include "lp_setup.h" @@ -51,6 +52,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, boolean changed = !util_framebuffer_state_equal(&lp->framebuffer, fb); + assert(fb->width <= MAXWIDTH); + assert(fb->height <= MAXHEIGHT); + if (changed) { util_copy_framebuffer_state(&lp->framebuffer, fb); diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index 57ac25ea0c..f6427aa908 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -35,24 +35,41 @@ #include "draw/draw_context.h" +void * +llvmpipe_create_vertex_elements_state(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs) +{ + struct lp_velems_state *velems; + assert(count <= PIPE_MAX_ATTRIBS); + velems = (struct lp_velems_state *) MALLOC(sizeof(struct lp_velems_state)); + if (velems) { + velems->count = count; + memcpy(velems->velem, attribs, sizeof(*attribs) * count); + } + return velems; +} + void -llvmpipe_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *attribs) +llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe, + void *velems) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct lp_velems_state *lp_velems = (struct lp_velems_state *) velems; - assert(count <= PIPE_MAX_ATTRIBS); - - memcpy(llvmpipe->vertex_element, attribs, - count * sizeof(struct pipe_vertex_element)); - llvmpipe->num_vertex_elements = count; + llvmpipe->velems = lp_velems; llvmpipe->dirty |= LP_NEW_VERTEX; - draw_set_vertex_elements(llvmpipe->draw, count, attribs); + if (velems) + draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem); } +void +llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) +{ + FREE( velems ); +} void llvmpipe_set_vertex_buffers(struct pipe_context *pipe, diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 6110b0a193..1a116989d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -27,21 +27,124 @@ #include "util/u_rect.h" #include "lp_context.h" +#include "lp_flush.h" #include "lp_surface.h" +#include "lp_texture.h" +#include "lp_tile_size.h" + + +/** + * Adjust x, y, width, height to lie on tile bounds. + */ +static void +adjust_to_tile_bounds(unsigned x, unsigned y, unsigned width, unsigned height, + unsigned *x_tile, unsigned *y_tile, + unsigned *w_tile, unsigned *h_tile) +{ + *x_tile = x & ~(TILE_SIZE - 1); + *y_tile = y & ~(TILE_SIZE - 1); + *w_tile = ((x + width + TILE_SIZE - 1) & ~(TILE_SIZE - 1)) - *x_tile; + *h_tile = ((y + height + TILE_SIZE - 1) & ~(TILE_SIZE - 1)) - *y_tile; +} + static void lp_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *dst, unsigned dstx, unsigned dsty, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - util_surface_copy(pipe, FALSE, - dest, destx, desty, - src, srcx, srcy, - width, height); + struct llvmpipe_resource *src_tex = llvmpipe_resource(src->texture); + struct llvmpipe_resource *dst_tex = llvmpipe_resource(dst->texture); + const enum pipe_format format = src_tex->base.format; + + llvmpipe_flush_texture(pipe, + dst->texture, dst->face, dst->level, + 0, /* flush_flags */ + FALSE, /* read_only */ + FALSE, /* cpu_access */ + FALSE); /* do_not_flush */ + + llvmpipe_flush_texture(pipe, + src->texture, src->face, src->level, + 0, /* flush_flags */ + TRUE, /* read_only */ + FALSE, /* cpu_access */ + FALSE); /* do_not_flush */ + + /* + printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n", + src_tex->id, dst_tex->id, + srcx, srcy, dstx, dsty, width, height); + */ + + /* set src tiles to linear layout */ + { + unsigned tx, ty, tw, th; + unsigned x, y; + + adjust_to_tile_bounds(srcx, srcy, width, height, &tx, &ty, &tw, &th); + + for (y = 0; y < th; y += TILE_SIZE) { + for (x = 0; x < tw; x += TILE_SIZE) { + (void) llvmpipe_get_texture_tile_linear(src_tex, + src->face, src->level, + LP_TEX_USAGE_READ, + tx + x, ty + y); + } + } + } + + /* set dst tiles to linear layout */ + { + unsigned tx, ty, tw, th; + unsigned x, y; + enum lp_texture_usage usage; + + /* XXX for the tiles which are completely contained by the + * dest rectangle, we could set the usage mode to WRITE_ALL. + * Just test for the case of replacing the whole dest region for now. + */ + if (width == dst_tex->base.width0 && height == dst_tex->base.height0) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + + adjust_to_tile_bounds(dstx, dsty, width, height, &tx, &ty, &tw, &th); + + for (y = 0; y < th; y += TILE_SIZE) { + for (x = 0; x < tw; x += TILE_SIZE) { + (void) llvmpipe_get_texture_tile_linear(dst_tex, + dst->face, dst->level, + usage, + tx + x, ty + y); + } + } + } + + /* copy */ + { + const ubyte *src_linear_ptr + = llvmpipe_get_texture_image_address(src_tex, src->face, + src->level, + LP_TEX_LAYOUT_LINEAR); + ubyte *dst_linear_ptr + = llvmpipe_get_texture_image_address(dst_tex, dst->face, + dst->level, + LP_TEX_LAYOUT_LINEAR); + + util_copy_rect(dst_linear_ptr, format, + llvmpipe_resource_stride(&dst_tex->base, dst->level), + dstx, dsty, + width, height, + src_linear_ptr, + llvmpipe_resource_stride(&src_tex->base, src->level), + srcx, srcy); + } } + void lp_init_surface_functions(struct llvmpipe_context *lp) { diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index a9b99945f9..338a04a487 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -41,7 +41,7 @@ #include <stdio.h> #include <float.h> -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" #include <llvm-c/Analysis.h> #include <llvm-c/ExecutionEngine.h> #include <llvm-c/Target.h> diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 5c9d418344..fae7bf3fcf 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -38,8 +38,8 @@ #include "gallivm/lp_bld_type.h" -#include "gallivm/lp_bld_blend.h" #include "gallivm/lp_bld_debug.h" +#include "lp_bld_blend.h" #include "lp_test.h" @@ -154,7 +154,6 @@ add_blend_test(LLVMModuleRef module, enum vector_mode mode, struct lp_type type) { - LLVMTypeRef ret_type; LLVMTypeRef vec_type; LLVMTypeRef args[4]; LLVMValueRef func; @@ -165,7 +164,6 @@ add_blend_test(LLVMModuleRef module, LLVMBasicBlockRef block; LLVMBuilderRef builder; - ret_type = LLVMInt64Type(); vec_type = lp_build_vec_type(type); args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index d05157991b..fbac815d10 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -28,68 +28,23 @@ #include <stdlib.h> #include <stdio.h> +#include <float.h> -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" #include <llvm-c/Analysis.h> -#include <llvm-c/ExecutionEngine.h> #include <llvm-c/Target.h> #include <llvm-c/Transforms/Scalar.h> -#include "util/u_cpu_detect.h" +#include "util/u_memory.h" #include "util/u_format.h" +#include "util/u_format_tests.h" +#include "util/u_format_s3tc.h" #include "gallivm/lp_bld_format.h" #include "lp_test.h" -struct pixel_test_case -{ - enum pipe_format format; - uint32_t packed; - double unpacked[4]; -}; - - -struct pixel_test_case test_cases[] = -{ - {PIPE_FORMAT_B5G6R5_UNORM, 0x0000, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_B5G6R5_UNORM, 0x001f, {0.0, 0.0, 1.0, 1.0}}, - {PIPE_FORMAT_B5G6R5_UNORM, 0x07e0, {0.0, 1.0, 0.0, 1.0}}, - {PIPE_FORMAT_B5G6R5_UNORM, 0xf800, {1.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_B5G6R5_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}}, - - {PIPE_FORMAT_B5G5R5A1_UNORM, 0x0000, {0.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_B5G5R5A1_UNORM, 0x001f, {0.0, 0.0, 1.0, 0.0}}, - {PIPE_FORMAT_B5G5R5A1_UNORM, 0x03e0, {0.0, 1.0, 0.0, 0.0}}, - {PIPE_FORMAT_B5G5R5A1_UNORM, 0x7c00, {1.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_B5G5R5A1_UNORM, 0x8000, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_B5G5R5A1_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}}, - - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x000000ff, {0.0, 0.0, 1.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x0000ff00, {0.0, 1.0, 0.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00ff0000, {1.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0xff000000, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, - -#if 0 - {PIPE_FORMAT_R8G8B8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_R8G8B8A8_UNORM, 0x000000ff, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_R8G8B8A8_UNORM, 0x0000ff00, {0.0, 0.0, 1.0, 0.0}}, - {PIPE_FORMAT_R8G8B8A8_UNORM, 0x00ff0000, {0.0, 1.0, 0.0, 0.0}}, - {PIPE_FORMAT_R8G8B8A8_UNORM, 0xff000000, {1.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_R8G8B8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, -#endif - - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x000000ff, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x0000ff00, {1.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00ff0000, {0.0, 1.0, 0.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0xff000000, {0.0, 0.0, 1.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, -}; - - void write_tsv_header(FILE *fp) { @@ -114,83 +69,43 @@ write_tsv_row(FILE *fp, } -typedef void (*load_ptr_t)(const uint32_t packed, float *); +typedef void +(*fetch_ptr_t)(float *, const void *packed, + unsigned i, unsigned j); static LLVMValueRef -add_load_rgba_test(LLVMModuleRef module, - const struct util_format_description *desc) -{ - LLVMTypeRef args[2]; - LLVMValueRef func; - LLVMValueRef packed; - LLVMValueRef rgba_ptr; - LLVMBasicBlockRef block; - LLVMBuilderRef builder; - LLVMValueRef rgba; - - args[0] = LLVMInt32Type(); - args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); - - func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); - LLVMSetFunctionCallConv(func, LLVMCCallConv); - packed = LLVMGetParam(func, 0); - rgba_ptr = LLVMGetParam(func, 1); - - block = LLVMAppendBasicBlock(func, "entry"); - builder = LLVMCreateBuilder(); - LLVMPositionBuilderAtEnd(builder, block); - - if(desc->block.bits < 32) - packed = LLVMBuildTrunc(builder, packed, LLVMIntType(desc->block.bits), ""); - - rgba = lp_build_unpack_rgba_aos(builder, desc, packed); - - LLVMBuildStore(builder, rgba, rgba_ptr); - - LLVMBuildRetVoid(builder); - - LLVMDisposeBuilder(builder); - return func; -} - - -typedef void (*store_ptr_t)(uint32_t *, const float *); - - -static LLVMValueRef -add_store_rgba_test(LLVMModuleRef module, +add_fetch_rgba_test(LLVMModuleRef lp_build_module, const struct util_format_description *desc) { - LLVMTypeRef args[2]; + LLVMTypeRef args[4]; LLVMValueRef func; LLVMValueRef packed_ptr; LLVMValueRef rgba_ptr; + LLVMValueRef i; + LLVMValueRef j; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; - LLVMValueRef packed; - args[0] = LLVMPointerType(LLVMInt32Type(), 0); - args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); + args[0] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); + args[1] = LLVMPointerType(LLVMInt8Type(), 0); + args[3] = args[2] = LLVMInt32Type(); - func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); + func = LLVMAddFunction(lp_build_module, "fetch", LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - packed_ptr = LLVMGetParam(func, 0); - rgba_ptr = LLVMGetParam(func, 1); + rgba_ptr = LLVMGetParam(func, 0); + packed_ptr = LLVMGetParam(func, 1); + i = LLVMGetParam(func, 2); + j = LLVMGetParam(func, 3); block = LLVMAppendBasicBlock(func, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - - packed = lp_build_pack_rgba_aos(builder, desc, rgba); - - if(desc->block.bits < 32) - packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); + rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr, i, j); - LLVMBuildStore(builder, packed, packed_ptr); + LLVMBuildStore(builder, rgba, rgba_ptr); LLVMBuildRetVoid(builder); @@ -201,47 +116,27 @@ add_store_rgba_test(LLVMModuleRef module, PIPE_ALIGN_STACK static boolean -test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) +test_format(unsigned verbose, FILE *fp, + const struct util_format_description *desc, + const struct util_format_test_case *test) { - LLVMModuleRef module = NULL; - LLVMValueRef load = NULL; - LLVMValueRef store = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMValueRef fetch = NULL; LLVMPassManagerRef pass = NULL; - char *error = NULL; - const struct util_format_description *desc; - load_ptr_t load_ptr; - store_ptr_t store_ptr; + fetch_ptr_t fetch_ptr; float unpacked[4]; - unsigned packed; boolean success; unsigned i; - desc = util_format_description(test->format); - fprintf(stderr, "%s\n", desc->name); - - module = LLVMModuleCreateWithName("test"); - - load = add_load_rgba_test(module, desc); - store = add_store_rgba_test(module, desc); - - if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { - LLVMDumpModule(module); - abort(); - } - LLVMDisposeMessage(error); + fetch = add_fetch_rgba_test(lp_build_module, desc); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); + if (LLVMVerifyFunction(fetch, LLVMPrintMessageAction)) { + LLVMDumpValue(fetch); abort(); } #if 0 pass = LLVMCreatePassManager(); - LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); + LLVMAddTargetData(LLVMGetExecutionEngineTargetData(lp_build_engine), pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ LLVMAddConstantPropagationPass(pass); @@ -249,40 +144,39 @@ test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) LLVMAddPromoteMemoryToRegisterPass(pass); LLVMAddGVNPass(pass); LLVMAddCFGSimplificationPass(pass); - LLVMRunPassManager(pass, module); + LLVMRunPassManager(pass, lp_build_module); #else (void)pass; #endif - load_ptr = (load_ptr_t) LLVMGetPointerToGlobal(engine, load); - store_ptr = (store_ptr_t)LLVMGetPointerToGlobal(engine, store); + fetch_ptr = (fetch_ptr_t) LLVMGetPointerToGlobal(lp_build_engine, fetch); memset(unpacked, 0, sizeof unpacked); - packed = 0; - load_ptr(test->packed, unpacked); - store_ptr(&packed, unpacked); + fetch_ptr(unpacked, test->packed, 0, 0); success = TRUE; - if(test->packed != packed) - success = FALSE; for(i = 0; i < 4; ++i) - if(test->unpacked[i] != unpacked[i]) + if (fabs((float)test->unpacked[0][0][i] - unpacked[i]) > FLT_EPSILON) success = FALSE; if (!success) { printf("FAILED\n"); - printf(" Packed: %08x\n", test->packed); - printf(" %08x\n", packed); - printf(" Unpacked: %f %f %f %f\n", unpacked[0], unpacked[1], unpacked[2], unpacked[3]); - printf(" %f %f %f %f\n", test->unpacked[0], test->unpacked[1], test->unpacked[2], test->unpacked[3]); - LLVMDumpModule(module); + printf(" Packed: %02x %02x %02x %02x\n", + test->packed[0], test->packed[1], test->packed[2], test->packed[3]); + printf(" Unpacked: %f %f %f %f obtained\n", + unpacked[0], unpacked[1], unpacked[2], unpacked[3]); + printf(" %f %f %f %f expected\n", + test->unpacked[0][0][0], + test->unpacked[0][0][1], + test->unpacked[0][0][2], + test->unpacked[0][0][3]); + LLVMDumpValue(fetch); } - LLVMFreeMachineCodeForFunction(engine, store); - LLVMFreeMachineCodeForFunction(engine, load); + LLVMFreeMachineCodeForFunction(lp_build_engine, fetch); + LLVMDeleteFunction(fetch); - LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); @@ -293,15 +187,66 @@ test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) } + +static boolean +test_one(unsigned verbose, FILE *fp, + const struct util_format_description *format_desc) +{ + unsigned i; + bool success = TRUE; + + printf("Testing %s ...\n", + format_desc->name); + + for (i = 0; i < util_format_nr_test_cases; ++i) { + const struct util_format_test_case *test = &util_format_test_cases[i]; + + if (test->format == format_desc->format) { + + if (!test_format(verbose, fp, format_desc, test)) { + success = FALSE; + } + + } + } + + return success; +} + + boolean test_all(unsigned verbose, FILE *fp) { - unsigned i; + enum pipe_format format; bool success = TRUE; - for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) - if(!test_format(verbose, fp, &test_cases[i])) - success = FALSE; + for (format = 1; format < PIPE_FORMAT_COUNT; ++format) { + const struct util_format_description *format_desc; + + format_desc = util_format_description(format); + if (!format_desc) { + continue; + } + + /* + * TODO: test more + */ + + if (format_desc->block.width != 1 || + format_desc->block.height != 1 || + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + continue; + } + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && + !util_format_s3tc_enabled) { + continue; + } + + if (!test_one(verbose, fp, format_desc)) { + success = FALSE; + } + } return success; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c new file mode 100644 index 0000000000..e5e5925012 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -0,0 +1,162 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdlib.h> +#include <stdio.h> + +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_printf.h" + +#include <llvm-c/Analysis.h> +#include <llvm-c/ExecutionEngine.h> +#include <llvm-c/Target.h> +#include <llvm-c/Transforms/Scalar.h> + +#include "lp_test.h" + + +struct printf_test_case { +}; + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + + +typedef void (*test_printf_t)(int i); + +static LLVMValueRef +add_printf_test(LLVMModuleRef module) +{ + LLVMTypeRef args[1] = { LLVMIntType(32) }; + LLVMValueRef func = LLVMAddFunction(module, "test_printf", LLVMFunctionType(LLVMVoidType(), args, 1, 0)); + LLVMBuilderRef builder = LLVMCreateBuilder(); + LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry"); + + LLVMSetFunctionCallConv(func, LLVMCCallConv); + + LLVMPositionBuilderAtEnd(builder, block); + lp_build_printf(builder, "hello, world\n"); + lp_build_printf(builder, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32Type(), 5, 0), + LLVMConstInt(LLVMInt32Type(), 6, 0)); + LLVMBuildRetVoid(builder); + LLVMDisposeBuilder(builder); + return func; +} + + +PIPE_ALIGN_STACK +static boolean +test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) +{ + LLVMModuleRef module = NULL; + LLVMValueRef test = NULL; + LLVMExecutionEngineRef engine = NULL; + LLVMModuleProviderRef provider = NULL; + LLVMPassManagerRef pass = NULL; + char *error = NULL; + test_printf_t test_printf; + float unpacked[4]; + unsigned packed; + boolean success = TRUE; + + module = LLVMModuleCreateWithName("test"); + + test = add_printf_test(module); + + if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { + LLVMDumpModule(module); + abort(); + } + LLVMDisposeMessage(error); + + provider = LLVMCreateModuleProviderForExistingModule(module); + if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { + fprintf(stderr, "%s\n", error); + LLVMDisposeMessage(error); + abort(); + } + +#if 0 + pass = LLVMCreatePassManager(); + LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + LLVMAddConstantPropagationPass(pass); + LLVMAddInstructionCombiningPass(pass); + LLVMAddPromoteMemoryToRegisterPass(pass); + LLVMAddGVNPass(pass); + LLVMAddCFGSimplificationPass(pass); + LLVMRunPassManager(pass, module); +#else + (void)pass; +#endif + + test_printf = (test_printf_t)LLVMGetPointerToGlobal(engine, test); + + memset(unpacked, 0, sizeof unpacked); + packed = 0; + + + // LLVMDumpModule(module); + + test_printf(0); + + LLVMFreeMachineCodeForFunction(engine, test); + + LLVMDisposeExecutionEngine(engine); + if(pass) + LLVMDisposePassManager(pass); + + return success; +} + + +boolean +test_all(unsigned verbose, FILE *fp) +{ + bool success = TRUE; + + test_printf(verbose, fp, NULL); + + return success; +} + + +boolean +test_some(unsigned verbose, FILE *fp, unsigned long n) +{ + return test_all(verbose, fp); +} diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index cb59a94464..1228a831f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -29,7 +29,7 @@ #define LP_TEX_SAMPLE_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" struct lp_sampler_static_state; diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c index 632462460a..74b7393e4e 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -51,10 +51,11 @@ /** - * This provides the bridge between the sampler state store in lp_jit_context - * and lp_jit_texture and the sampler code generator. It provides the - * texture layout information required by the texture sampler code generator - * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime. + * This provides the bridge between the sampler state store in + * lp_jit_context and lp_jit_texture and the sampler code + * generator. It provides the texture layout information required by + * the texture sampler code generator in terms of the state stored in + * lp_jit_context and lp_jit_texture in runtime. */ struct llvmpipe_sampler_dynamic_state { @@ -79,6 +80,9 @@ struct lp_llvm_sampler_soa /** * Fetch the specified member of the lp_jit_texture structure. + * \param emit_load if TRUE, emit the LLVM load instruction to actually + * fetch the field's value. Otherwise, just emit the + * GEP code to address the field. * * @sa http://llvm.org/docs/GetElementPtr.html */ @@ -87,9 +91,11 @@ lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, LLVMBuilderRef builder, unsigned unit, unsigned member_index, - const char *member_name) + const char *member_name, + boolean emit_load) { - struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base; + struct llvmpipe_sampler_dynamic_state *state = + (struct llvmpipe_sampler_dynamic_state *)base; LLVMValueRef indices[4]; LLVMValueRef ptr; LLVMValueRef res; @@ -99,7 +105,7 @@ lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, /* context[0] */ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); /* context[0].textures */ - indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CTX_TEXTURES, 0); /* context[0].textures[unit] */ indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0); /* context[0].textures[unit].member */ @@ -107,7 +113,10 @@ lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); - res = LLVMBuildLoad(builder, ptr, ""); + if (emit_load) + res = LLVMBuildLoad(builder, ptr, ""); + else + res = ptr; lp_build_name(res, "context.texture%u.%s", unit, member_name); @@ -116,28 +125,31 @@ lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, /** - * Helper macro to instantiate the functions that generate the code to fetch - * the members of lp_jit_texture to fulfill the sampler code generator requests. + * Helper macro to instantiate the functions that generate the code to + * fetch the members of lp_jit_texture to fulfill the sampler code + * generator requests. * - * This complexity is the price we have to pay to keep the texture sampler code - * generator a reusable module without dependencies to llvmpipe internals. + * This complexity is the price we have to pay to keep the texture + * sampler code generator a reusable module without dependencies to + * llvmpipe internals. */ -#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \ +#define LP_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \ static LLVMValueRef \ lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \ LLVMBuilderRef builder, \ unsigned unit) \ { \ - return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \ + return lp_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \ } -LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH) -LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT) -LP_LLVM_TEXTURE_MEMBER(depth, LP_JIT_TEXTURE_DEPTH) -LP_LLVM_TEXTURE_MEMBER(last_level, LP_JIT_TEXTURE_LAST_LEVEL) -LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE) -LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA) +LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH, TRUE) +LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT, TRUE) +LP_LLVM_TEXTURE_MEMBER(depth, LP_JIT_TEXTURE_DEPTH, TRUE) +LP_LLVM_TEXTURE_MEMBER(last_level, LP_JIT_TEXTURE_LAST_LEVEL, TRUE) +LP_LLVM_TEXTURE_MEMBER(row_stride, LP_JIT_TEXTURE_ROW_STRIDE, FALSE) +LP_LLVM_TEXTURE_MEMBER(img_stride, LP_JIT_TEXTURE_IMG_STRIDE, FALSE) +LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA, FALSE) static void @@ -193,7 +205,8 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, sampler->dynamic_state.base.height = lp_llvm_texture_height; sampler->dynamic_state.base.depth = lp_llvm_texture_depth; sampler->dynamic_state.base.last_level = lp_llvm_texture_last_level; - sampler->dynamic_state.base.stride = lp_llvm_texture_stride; + sampler->dynamic_state.base.row_stride = lp_llvm_texture_row_stride; + sampler->dynamic_state.base.img_stride = lp_llvm_texture_img_stride; sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr; sampler->dynamic_state.static_state = static_state; sampler->dynamic_state.context_ptr = context_ptr; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 7f45635542..336b487bd4 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -30,182 +30,422 @@ * Michel Dänzer <michel@tungstengraphics.com> */ +#include <stdio.h> + #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "util/u_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "lp_context.h" +#include "lp_flush.h" #include "lp_screen.h" +#include "lp_tile_image.h" #include "lp_texture.h" +#include "lp_setup.h" #include "lp_tile_size.h" -#include "lp_winsys.h" + +#include "state_tracker/sw_winsys.h" + + +static INLINE boolean +resource_is_texture(const struct pipe_resource *resource) +{ + switch (resource->target) { + case PIPE_BUFFER: + return FALSE; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_CUBE: + return TRUE; + default: + assert(0); + return FALSE; + } +} + + + +/** + * Allocate storage for llvmpipe_texture::layout array. + * The number of elements is width_in_tiles * height_in_tiles. + */ +static enum lp_texture_layout * +alloc_layout_array(unsigned num_slices, unsigned width, unsigned height) +{ + const unsigned tx = align(width, TILE_SIZE) / TILE_SIZE; + const unsigned ty = align(height, TILE_SIZE) / TILE_SIZE; + + assert(num_slices * tx * ty > 0); + assert(LP_TEX_LAYOUT_NONE == 0); /* calloc'ing LP_TEX_LAYOUT_NONE here */ + + return (enum lp_texture_layout *) + CALLOC(num_slices * tx * ty, sizeof(enum lp_texture_layout)); +} + /** * Conventional allocation path for non-display textures: - * Simple, maximally packed layout. + * Just compute row strides here. Storage is allocated on demand later. */ static boolean llvmpipe_texture_layout(struct llvmpipe_screen *screen, - struct llvmpipe_texture *lpt) + struct llvmpipe_resource *lpr) { - struct pipe_texture *pt = &lpt->base; + struct pipe_resource *pt = &lpr->base; unsigned level; unsigned width = pt->width0; unsigned height = pt->height0; unsigned depth = pt->depth0; - unsigned buffer_size = 0; + + assert(LP_MAX_TEXTURE_2D_LEVELS <= LP_MAX_TEXTURE_LEVELS); + assert(LP_MAX_TEXTURE_3D_LEVELS <= LP_MAX_TEXTURE_LEVELS); for (level = 0; level <= pt->last_level; level++) { - unsigned nblocksx, nblocksy; + const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE; + const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE; + unsigned nblocksx, num_slices; + + if (lpr->base.target == PIPE_TEXTURE_CUBE) + num_slices = 6; + else if (lpr->base.target == PIPE_TEXTURE_3D) + num_slices = depth; + else + num_slices = 1; /* Allocate storage for whole quads. This is particularly important * for depth surfaces, which are currently stored in a swizzled format. */ nblocksx = util_format_get_nblocksx(pt->format, align(width, TILE_SIZE)); - nblocksy = util_format_get_nblocksy(pt->format, align(height, TILE_SIZE)); - lpt->stride[level] = align(nblocksx * util_format_get_blocksize(pt->format), 16); + lpr->row_stride[level] = + align(nblocksx * util_format_get_blocksize(pt->format), 16); - lpt->level_offset[level] = buffer_size; + lpr->img_stride[level] = lpr->row_stride[level] * align(height, TILE_SIZE); - buffer_size += (nblocksy * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - lpt->stride[level]); + lpr->tiles_per_row[level] = width_t; + lpr->tiles_per_image[level] = width_t * height_t; + lpr->num_slices_faces[level] = num_slices; + lpr->layout[level] = alloc_layout_array(num_slices, width, height); width = u_minify(width, 1); height = u_minify(height, 1); depth = u_minify(depth, 1); } - lpt->data = align_malloc(buffer_size, 16); - - return lpt->data != NULL; + return TRUE; } static boolean llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, - struct llvmpipe_texture *lpt) + struct llvmpipe_resource *lpr) { - struct llvmpipe_winsys *winsys = screen->winsys; + struct sw_winsys *winsys = screen->winsys; /* Round up the surface size to a multiple of the tile size to * avoid tile clipping. */ - unsigned width = align(lpt->base.width0, TILE_SIZE); - unsigned height = align(lpt->base.height0, TILE_SIZE); - - lpt->dt = winsys->displaytarget_create(winsys, - lpt->base.format, + const unsigned width = align(lpr->base.width0, TILE_SIZE); + const unsigned height = align(lpr->base.height0, TILE_SIZE); + const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE; + const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE; + + lpr->tiles_per_row[0] = width_t; + lpr->tiles_per_image[0] = width_t * height_t; + lpr->num_slices_faces[0] = 1; + lpr->img_stride[0] = 0; + + lpr->layout[0] = alloc_layout_array(1, width, height); + //lpr->layout[0][0] = LP_TEX_LAYOUT_LINEAR; + + lpr->dt = winsys->displaytarget_create(winsys, + lpr->base.bind, + lpr->base.format, width, height, 16, - &lpt->stride[0] ); + &lpr->row_stride[0] ); - return lpt->dt != NULL; + return lpr->dt != NULL; } -static struct pipe_texture * -llvmpipe_texture_create(struct pipe_screen *_screen, - const struct pipe_texture *templat) +static struct pipe_resource * +llvmpipe_resource_create(struct pipe_screen *_screen, + const struct pipe_resource *templat) { + static unsigned id_counter = 0; struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - struct llvmpipe_texture *lpt = CALLOC_STRUCT(llvmpipe_texture); - if (!lpt) + struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource); + if (!lpr) return NULL; - lpt->base = *templat; - pipe_reference_init(&lpt->base.reference, 1); - lpt->base.screen = &screen->base; + lpr->base = *templat; + pipe_reference_init(&lpr->base.reference, 1); + lpr->base.screen = &screen->base; - if (lpt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY)) { - if (!llvmpipe_displaytarget_layout(screen, lpt)) - goto fail; + assert(lpr->base.bind); + + if (resource_is_texture(&lpr->base)) { + if (lpr->base.bind & PIPE_BIND_DISPLAY_TARGET) { + /* displayable surface */ + if (!llvmpipe_displaytarget_layout(screen, lpr)) + goto fail; + assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE); + } + else { + /* texture map */ + if (!llvmpipe_texture_layout(screen, lpr)) + goto fail; + assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE); + } + assert(lpr->layout[0]); } else { - if (!llvmpipe_texture_layout(screen, lpt)) + /* other data (vertex buffer, const buffer, etc) */ + const enum pipe_format format = templat->format; + const uint w = templat->width0 / util_format_get_blockheight(format); + const uint h = templat->height0 / util_format_get_blockwidth(format); + const uint d = templat->depth0; + const uint bpp = util_format_get_blocksize(format); + const uint bytes = w * h * d * bpp; + lpr->data = align_malloc(bytes, 16); + if (!lpr->data) goto fail; } - - return &lpt->base; + + lpr->id = id_counter++; + + return &lpr->base; fail: - FREE(lpt); + FREE(lpr); return NULL; } -static struct pipe_texture * -llvmpipe_texture_blanket(struct pipe_screen * screen, - const struct pipe_texture *base, - const unsigned *stride, - struct pipe_buffer *buffer) +static void +llvmpipe_resource_destroy(struct pipe_screen *pscreen, + struct pipe_resource *pt) { - /* FIXME */ -#if 0 - struct llvmpipe_texture *lpt; - assert(screen); + struct llvmpipe_screen *screen = llvmpipe_screen(pscreen); + struct llvmpipe_resource *lpr = llvmpipe_resource(pt); - /* Only supports one type */ - if (base->target != PIPE_TEXTURE_2D || - base->last_level != 0 || - base->depth0 != 1) { - return NULL; + if (lpr->dt) { + /* display target */ + struct sw_winsys *winsys = screen->winsys; + winsys->displaytarget_destroy(winsys, lpr->dt); } + else if (resource_is_texture(pt)) { + /* regular texture */ + uint level; + + /* free linear image data */ + for (level = 0; level < Elements(lpr->linear); level++) { + if (lpr->linear[level].data) { + align_free(lpr->linear[level].data); + lpr->linear[level].data = NULL; + } + } - lpt = CALLOC_STRUCT(llvmpipe_texture); - if (!lpt) - return NULL; - - lpt->base = *base; - pipe_reference_init(&lpt->base.reference, 1); - lpt->base.screen = screen; - lpt->stride[0] = stride[0]; + /* free tiled image data */ + for (level = 0; level < Elements(lpr->tiled); level++) { + if (lpr->tiled[level].data) { + align_free(lpr->tiled[level].data); + lpr->tiled[level].data = NULL; + } + } - pipe_buffer_reference(&lpt->buffer, buffer); + /* free layout flag arrays */ + for (level = 0; level < Elements(lpr->tiled); level++) { + FREE(lpr->layout[level]); + lpr->layout[level] = NULL; + } + } + else if (!lpr->userBuffer) { + assert(lpr->data); + align_free(lpr->data); + } - return &lpt->base; -#else - debug_printf("llvmpipe_texture_blanket() not implemented!"); - return NULL; -#endif + FREE(lpr); } -static void -llvmpipe_texture_destroy(struct pipe_texture *pt) +/** + * Map a resource for read/write. + */ +void * +llvmpipe_resource_map(struct pipe_resource *resource, + unsigned face, + unsigned level, + unsigned zslice, + enum lp_texture_usage tex_usage, + enum lp_texture_layout layout) { - struct llvmpipe_screen *screen = llvmpipe_screen(pt->screen); - struct llvmpipe_texture *lpt = llvmpipe_texture(pt); + struct llvmpipe_resource *lpr = llvmpipe_resource(resource); + uint8_t *map; + + assert(face < 6); + assert(level < LP_MAX_TEXTURE_LEVELS); + + assert(tex_usage == LP_TEX_USAGE_READ || + tex_usage == LP_TEX_USAGE_READ_WRITE || + tex_usage == LP_TEX_USAGE_WRITE_ALL); + + assert(layout == LP_TEX_LAYOUT_NONE || + layout == LP_TEX_LAYOUT_TILED || + layout == LP_TEX_LAYOUT_LINEAR); - if (lpt->dt) { + if (lpr->dt) { /* display target */ - struct llvmpipe_winsys *winsys = screen->winsys; - winsys->displaytarget_destroy(winsys, lpt->dt); + struct llvmpipe_screen *screen = llvmpipe_screen(resource->screen); + struct sw_winsys *winsys = screen->winsys; + unsigned dt_usage; + uint8_t *map2; + + if (tex_usage == LP_TEX_USAGE_READ) { + dt_usage = PIPE_TRANSFER_READ; + } + else { + dt_usage = PIPE_TRANSFER_READ_WRITE; + } + + assert(face == 0); + assert(level == 0); + assert(zslice == 0); + + /* FIXME: keep map count? */ + map = winsys->displaytarget_map(winsys, lpr->dt, dt_usage); + + /* install this linear image in texture data structure */ + lpr->linear[level].data = map; + + /* make sure tiled data gets converted to linear data */ + map2 = llvmpipe_get_texture_image(lpr, 0, 0, tex_usage, layout); + if (layout == LP_TEX_LAYOUT_LINEAR) + assert(map == map2); + + return map2; } - else { + else if (resource_is_texture(resource)) { /* regular texture */ - align_free(lpt->data); + if (resource->target != PIPE_TEXTURE_CUBE) { + assert(face == 0); + } + if (resource->target != PIPE_TEXTURE_3D) { + assert(zslice == 0); + } + + map = llvmpipe_get_texture_image(lpr, face + zslice, level, + tex_usage, layout); + assert(map); + return map; + } + else { + return lpr->data; + } +} + + +/** + * Unmap a resource. + */ +void +llvmpipe_resource_unmap(struct pipe_resource *resource, + unsigned face, + unsigned level, + unsigned zslice) +{ + struct llvmpipe_resource *lpr = llvmpipe_resource(resource); + + if (lpr->dt) { + /* display target */ + struct llvmpipe_screen *lp_screen = llvmpipe_screen(resource->screen); + struct sw_winsys *winsys = lp_screen->winsys; + + assert(face == 0); + assert(level == 0); + assert(zslice == 0); + + /* make sure linear image is up to date */ + (void) llvmpipe_get_texture_image(lpr, face + zslice, level, + LP_TEX_USAGE_READ, + LP_TEX_LAYOUT_LINEAR); + + winsys->displaytarget_unmap(winsys, lpr->dt); } +} + + +void * +llvmpipe_resource_data(struct pipe_resource *resource) +{ + struct llvmpipe_resource *lpr = llvmpipe_resource(resource); + + assert(!resource_is_texture(resource)); + + return lpr->data; +} + + +static struct pipe_resource * +llvmpipe_resource_from_handle(struct pipe_screen *screen, + const struct pipe_resource *template, + struct winsys_handle *whandle) +{ + struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys; + struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource); + if (!lpr) + return NULL; + + lpr->base = *template; + pipe_reference_init(&lpr->base.reference, 1); + lpr->base.screen = screen; + + lpr->dt = winsys->displaytarget_from_handle(winsys, + template, + whandle, + &lpr->row_stride[0]); + if (!lpr->dt) + goto fail; + + return &lpr->base; + + fail: + FREE(lpr); + return NULL; +} + + +static boolean +llvmpipe_resource_get_handle(struct pipe_screen *screen, + struct pipe_resource *pt, + struct winsys_handle *whandle) +{ + struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys; + struct llvmpipe_resource *lpr = llvmpipe_resource(pt); - FREE(lpt); + assert(lpr->dt); + if (!lpr->dt) + return FALSE; + + return winsys->displaytarget_get_handle(winsys, lpr->dt, whandle); } static struct pipe_surface * llvmpipe_get_tex_surface(struct pipe_screen *screen, - struct pipe_texture *pt, + struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, - unsigned usage) + enum lp_texture_usage usage) { - struct llvmpipe_texture *lpt = llvmpipe_texture(pt); struct pipe_surface *ps; assert(level <= pt->last_level); @@ -213,53 +453,15 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, ps = CALLOC_STRUCT(pipe_surface); if (ps) { pipe_reference_init(&ps->reference, 1); - pipe_texture_reference(&ps->texture, pt); + pipe_resource_reference(&ps->texture, pt); ps->format = pt->format; ps->width = u_minify(pt->width0, level); ps->height = u_minify(pt->height0, level); - ps->offset = lpt->level_offset[level]; ps->usage = usage; - /* Because we are llvmpipe, anything that the state tracker - * thought was going to be done with the GPU will actually get - * done with the CPU. Let's adjust the flags to take that into - * account. - */ - if (ps->usage & PIPE_BUFFER_USAGE_GPU_WRITE) { - /* GPU_WRITE means "render" and that can involve reads (blending) */ - ps->usage |= PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_CPU_READ; - } - - if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ) - ps->usage |= PIPE_BUFFER_USAGE_CPU_READ; - - if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_GPU_WRITE)) { - /* Mark the surface as dirty. */ - lpt->timestamp++; - llvmpipe_screen(screen)->timestamp++; - } - ps->face = face; ps->level = level; ps->zslice = zslice; - - /* XXX shouldn't that rather be - tex_height = align(ps->height, 2); - to account for alignment done in llvmpipe_texture_layout ? - */ - if (pt->target == PIPE_TEXTURE_CUBE) { - unsigned tex_height = ps->height; - ps->offset += face * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; - } - else if (pt->target == PIPE_TEXTURE_3D) { - unsigned tex_height = ps->height; - ps->offset += zslice * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; - } - else { - assert(face == 0); - assert(zslice == 0); - } } return ps; } @@ -273,56 +475,33 @@ llvmpipe_tex_surface_destroy(struct pipe_surface *surf) * where it would happen. For llvmpipe, nothing to do. */ assert(surf->texture); - pipe_texture_reference(&surf->texture, NULL); + pipe_resource_reference(&surf->texture, NULL); FREE(surf); } static struct pipe_transfer * -llvmpipe_get_tex_transfer(struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, unsigned w, unsigned h) -{ - struct llvmpipe_texture *lptex = llvmpipe_texture(texture); - struct llvmpipe_transfer *lpt; - - assert(texture); - assert(level <= texture->last_level); - - lpt = CALLOC_STRUCT(llvmpipe_transfer); - if (lpt) { - struct pipe_transfer *pt = &lpt->base; - pipe_texture_reference(&pt->texture, texture); - pt->x = x; - pt->y = y; - pt->width = align(w, TILE_SIZE); - pt->height = align(h, TILE_SIZE); - pt->stride = lptex->stride[level]; +llvmpipe_get_transfer(struct pipe_context *pipe, + struct pipe_resource *resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) +{ + struct llvmpipe_resource *lprex = llvmpipe_resource(resource); + struct llvmpipe_transfer *lpr; + + assert(resource); + assert(sr.level <= resource->last_level); + + lpr = CALLOC_STRUCT(llvmpipe_transfer); + if (lpr) { + struct pipe_transfer *pt = &lpr->base; + pipe_resource_reference(&pt->resource, resource); + pt->box = *box; + pt->sr = sr; + pt->stride = lprex->row_stride[sr.level]; pt->usage = usage; - pt->face = face; - pt->level = level; - pt->zslice = zslice; - - lpt->offset = lptex->level_offset[level]; - - /* XXX shouldn't that rather be - tex_height = align(u_minify(texture->height0, level), 2) - to account for alignment done in llvmpipe_texture_layout ? - */ - if (texture->target == PIPE_TEXTURE_CUBE) { - unsigned tex_height = u_minify(texture->height0, level); - lpt->offset += face * util_format_get_nblocksy(texture->format, tex_height) * pt->stride; - } - else if (texture->target == PIPE_TEXTURE_3D) { - unsigned tex_height = u_minify(texture->height0, level); - lpt->offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * pt->stride; - } - else { - assert(face == 0); - assert(zslice == 0); - } + return pt; } return NULL; @@ -330,92 +509,673 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, static void -llvmpipe_tex_transfer_destroy(struct pipe_transfer *transfer) +llvmpipe_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) { /* Effectively do the texture_update work here - if texture images * needed post-processing to put them into hardware layout, this is * where it would happen. For llvmpipe, nothing to do. */ - assert (transfer->texture); - pipe_texture_reference(&transfer->texture, NULL); + assert (transfer->resource); + pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); } static void * -llvmpipe_transfer_map( struct pipe_screen *_screen, +llvmpipe_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) { - struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - ubyte *map, *xfer_map; - struct llvmpipe_texture *lpt; + struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); + ubyte *map; + struct llvmpipe_resource *lpr; enum pipe_format format; - - assert(transfer->texture); - lpt = llvmpipe_texture(transfer->texture); - format = lpt->base.format; - - if (lpt->dt) { - /* display target */ - struct llvmpipe_winsys *winsys = screen->winsys; - - map = winsys->displaytarget_map(winsys, lpt->dt, - pipe_transfer_buffer_flags(transfer)); - if (map == NULL) - return NULL; + enum lp_texture_usage tex_usage; + const char *mode; + + assert(transfer->sr.face < 6); + assert(transfer->sr.level < LP_MAX_TEXTURE_LEVELS); + + /* + printf("tex_transfer_map(%d, %d %d x %d of %d x %d, usage %d )\n", + transfer->x, transfer->y, transfer->width, transfer->height, + transfer->texture->width0, + transfer->texture->height0, + transfer->usage); + */ + + if (transfer->usage == PIPE_TRANSFER_READ) { + tex_usage = LP_TEX_USAGE_READ; + mode = "read"; } else { - /* regular texture */ - map = lpt->data; + tex_usage = LP_TEX_USAGE_READ_WRITE; + mode = "read/write"; } - /* May want to different things here depending on read/write nature + if (0) { + struct llvmpipe_resource *lpr = llvmpipe_resource(transfer->resource); + printf("transfer map tex %u mode %s\n", lpr->id, mode); + } + + + assert(transfer->resource); + lpr = llvmpipe_resource(transfer->resource); + format = lpr->base.format; + + /* + * Transfers, like other pipe operations, must happen in order, so flush the + * context if necessary. + */ + llvmpipe_flush_texture(pipe, + transfer->resource, + transfer->sr.face, + transfer->sr.level, + 0, /* flush_flags */ + !(transfer->usage & PIPE_TRANSFER_WRITE), /* read_only */ + TRUE, /* cpu_access */ + FALSE); /* do_not_flush */ + + map = llvmpipe_resource_map(transfer->resource, + transfer->sr.face, + transfer->sr.level, + transfer->box.z, + tex_usage, LP_TEX_LAYOUT_LINEAR); + + + /* May want to do different things here depending on read/write nature * of the map: */ - if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { /* Do something to notify sharing contexts of a texture change. */ screen->timestamp++; } - xfer_map = map + llvmpipe_transfer(transfer)->offset + - transfer->y / util_format_get_blockheight(format) * transfer->stride + - transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); - /*printf("map = %p xfer map = %p\n", map, xfer_map);*/ - return xfer_map; + map += + transfer->box.y / util_format_get_blockheight(format) * transfer->stride + + transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); + + return map; } static void -llvmpipe_transfer_unmap(struct pipe_screen *screen, - struct pipe_transfer *transfer) +llvmpipe_transfer_unmap(struct pipe_context *pipe, + struct pipe_transfer *transfer) { - struct llvmpipe_screen *lp_screen = llvmpipe_screen(screen); - struct llvmpipe_texture *lpt; + assert(transfer->resource); - assert(transfer->texture); - lpt = llvmpipe_texture(transfer->texture); + llvmpipe_resource_unmap(transfer->resource, + transfer->sr.face, + transfer->sr.level, + transfer->box.z); +} - if (lpt->dt) { - /* display target */ - struct llvmpipe_winsys *winsys = lp_screen->winsys; - winsys->displaytarget_unmap(winsys, lpt->dt); +static unsigned int +llvmpipe_is_resource_referenced( struct pipe_context *pipe, + struct pipe_resource *presource, + unsigned face, unsigned level) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); + + if (presource->target == PIPE_BUFFER) + return PIPE_UNREFERENCED; + + return lp_setup_is_resource_referenced(llvmpipe->setup, presource); +} + + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_resource * +llvmpipe_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes, + unsigned bind_flags) +{ + struct llvmpipe_resource *buffer; + + buffer = CALLOC_STRUCT(llvmpipe_resource); + if(!buffer) + return NULL; + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.screen = screen; + buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ + buffer->base.bind = bind_flags; + buffer->base.usage = PIPE_USAGE_IMMUTABLE; + buffer->base.flags = 0; + buffer->base.width0 = bytes; + buffer->base.height0 = 1; + buffer->base.depth0 = 1; + buffer->userBuffer = TRUE; + buffer->data = ptr; + + return &buffer->base; +} + + +/** + * Compute size (in bytes) need to store a texture image / mipmap level, + * for just one cube face or one 3D texture slice + */ +static unsigned +tex_image_face_size(const struct llvmpipe_resource *lpr, unsigned level, + enum lp_texture_layout layout) +{ + const unsigned width = u_minify(lpr->base.width0, level); + const unsigned height = u_minify(lpr->base.height0, level); + + assert(layout == LP_TEX_LAYOUT_TILED || + layout == LP_TEX_LAYOUT_LINEAR); + + if (layout == LP_TEX_LAYOUT_TILED) { + /* for tiled layout, force a 32bpp format */ + const enum pipe_format format = PIPE_FORMAT_B8G8R8A8_UNORM; + const unsigned block_size = util_format_get_blocksize(format); + const unsigned nblocksy = + util_format_get_nblocksy(format, align(height, TILE_SIZE)); + const unsigned nblocksx = + util_format_get_nblocksx(format, align(width, TILE_SIZE)); + const unsigned buffer_size = block_size * nblocksy * nblocksx; + return buffer_size; + } + else { + const enum pipe_format format = lpr->base.format; + const unsigned nblocksy = + util_format_get_nblocksy(format, align(height, TILE_SIZE)); + const unsigned buffer_size = nblocksy * lpr->row_stride[level]; + return buffer_size; + } +} + + +/** + * Compute size (in bytes) need to store a texture image / mipmap level, + * including all cube faces or 3D image slices + */ +static unsigned +tex_image_size(const struct llvmpipe_resource *lpr, unsigned level, + enum lp_texture_layout layout) +{ + const unsigned buf_size = tex_image_face_size(lpr, level, layout); + return buf_size * lpr->num_slices_faces[level]; +} + + +/** + * This function encapsulates some complicated logic for determining + * how to convert a tile of image data from linear layout to tiled + * layout, or vice versa. + * \param cur_layout the current tile layout + * \param target_layout the desired tile layout + * \param usage how the tile will be accessed (R/W vs. read-only, etc) + * \param new_layout_return returns the new layout mode + * \param convert_return returns TRUE if image conversion is needed + */ +static void +layout_logic(enum lp_texture_layout cur_layout, + enum lp_texture_layout target_layout, + enum lp_texture_usage usage, + enum lp_texture_layout *new_layout_return, + boolean *convert) +{ + enum lp_texture_layout other_layout, new_layout; + + *convert = FALSE; + + new_layout = 99; /* debug check */ + + if (target_layout == LP_TEX_LAYOUT_LINEAR) { + other_layout = LP_TEX_LAYOUT_TILED; + } + else { + assert(target_layout == LP_TEX_LAYOUT_TILED); + other_layout = LP_TEX_LAYOUT_LINEAR; + } + + new_layout = target_layout; /* may get changed below */ + + if (cur_layout == LP_TEX_LAYOUT_BOTH) { + if (usage == LP_TEX_USAGE_READ) { + new_layout = LP_TEX_LAYOUT_BOTH; + } + } + else if (cur_layout == other_layout) { + if (usage != LP_TEX_USAGE_WRITE_ALL) { + /* need to convert tiled data to linear or vice versa */ + *convert = TRUE; + + if (usage == LP_TEX_USAGE_READ) + new_layout = LP_TEX_LAYOUT_BOTH; + } + } + else { + assert(cur_layout == LP_TEX_LAYOUT_NONE || + cur_layout == target_layout); + } + + assert(new_layout == LP_TEX_LAYOUT_BOTH || + new_layout == target_layout); + + *new_layout_return = new_layout; +} + + +/** + * Return pointer to a 2D texture image/face/slice. + * No tiled/linear conversion is done. + */ +ubyte * +llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + enum lp_texture_layout layout) +{ + struct llvmpipe_texture_image *img; + unsigned offset; + + if (layout == LP_TEX_LAYOUT_LINEAR) { + img = &lpr->linear[level]; + } + else { + assert (layout == LP_TEX_LAYOUT_TILED); + img = &lpr->tiled[level]; + } + + if (face_slice > 0) + offset = face_slice * tex_image_face_size(lpr, level, layout); + else + offset = 0; + + return (ubyte *) img->data + offset; +} + + +static INLINE enum lp_texture_layout +llvmpipe_get_texture_tile_layout(const struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + unsigned x, unsigned y) +{ + uint i; + assert(resource_is_texture(&lpr->base)); + assert(x < lpr->tiles_per_row[level]); + i = face_slice * lpr->tiles_per_image[level] + + y * lpr->tiles_per_row[level] + x; + return lpr->layout[level][i]; +} + + +static INLINE void +llvmpipe_set_texture_tile_layout(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + unsigned x, unsigned y, + enum lp_texture_layout layout) +{ + uint i; + assert(resource_is_texture(&lpr->base)); + assert(x < lpr->tiles_per_row[level]); + i = face_slice * lpr->tiles_per_image[level] + + y * lpr->tiles_per_row[level] + x; + lpr->layout[level][i] = layout; +} + + +/** + * Set the layout mode for all tiles in a particular image. + */ +static INLINE void +llvmpipe_set_texture_image_layout(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + unsigned width_t, unsigned height_t, + enum lp_texture_layout layout) +{ + const unsigned start = face_slice * lpr->tiles_per_image[level]; + unsigned i; + + for (i = 0; i < width_t * height_t; i++) { + lpr->layout[level][start + i] = layout; + } +} + + +/** + * Allocate storage for a linear or tile texture image (all cube + * faces and all 3D slices. + */ +static void +alloc_image_data(struct llvmpipe_resource *lpr, unsigned level, + enum lp_texture_layout layout) +{ + if (lpr->dt) + assert(level == 0); + + if (layout == LP_TEX_LAYOUT_TILED) { + /* tiled data is stored in regular memory */ + uint buffer_size = tex_image_size(lpr, level, layout); + lpr->tiled[level].data = align_malloc(buffer_size, 16); + } + else { + assert(layout == LP_TEX_LAYOUT_LINEAR); + if (lpr->dt) { + /* we get the linear memory from the winsys */ + struct llvmpipe_screen *screen = llvmpipe_screen(lpr->base.screen); + struct sw_winsys *winsys = screen->winsys; + + lpr->linear[0].data = + winsys->displaytarget_map(winsys, lpr->dt, + PIPE_TRANSFER_READ_WRITE); + } + else { + /* not a display target - allocate regular memory */ + uint buffer_size = tex_image_size(lpr, level, LP_TEX_LAYOUT_LINEAR); + lpr->linear[level].data = align_malloc(buffer_size, 16); + } + } +} + + + +/** + * Return pointer to texture image data (either linear or tiled layout) + * for a particular cube face or 3D texture slice. + * + * \param face_slice the cube face or 3D slice of interest + * \param usage one of LP_TEX_USAGE_READ/WRITE_ALL/READ_WRITE + * \param layout either LP_TEX_LAYOUT_LINEAR or _TILED or _NONE + */ +void * +llvmpipe_get_texture_image(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + enum lp_texture_usage usage, + enum lp_texture_layout layout) +{ + /* + * 'target' refers to the image which we're retrieving (either in + * tiled or linear layout). + * 'other' refers to the same image but in the other layout. (it may + * or may not exist. + */ + struct llvmpipe_texture_image *target_img; + struct llvmpipe_texture_image *other_img; + void *target_data; + void *other_data; + const unsigned width = u_minify(lpr->base.width0, level); + const unsigned height = u_minify(lpr->base.height0, level); + const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE; + const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE; + enum lp_texture_layout other_layout; + boolean only_allocate; + + assert(layout == LP_TEX_LAYOUT_NONE || + layout == LP_TEX_LAYOUT_TILED || + layout == LP_TEX_LAYOUT_LINEAR); + + assert(usage == LP_TEX_USAGE_READ || + usage == LP_TEX_USAGE_READ_WRITE || + usage == LP_TEX_USAGE_WRITE_ALL); + + /* check for the special case of layout == LP_TEX_LAYOUT_NONE */ + if (layout == LP_TEX_LAYOUT_NONE) { + only_allocate = TRUE; + layout = LP_TEX_LAYOUT_TILED; + } + else { + only_allocate = FALSE; + } + + if (lpr->dt) { + assert(lpr->linear[level].data); + } + + /* which is target? which is other? */ + if (layout == LP_TEX_LAYOUT_LINEAR) { + target_img = &lpr->linear[level]; + other_img = &lpr->tiled[level]; + other_layout = LP_TEX_LAYOUT_TILED; + } + else { + target_img = &lpr->tiled[level]; + other_img = &lpr->linear[level]; + other_layout = LP_TEX_LAYOUT_LINEAR; + } + + target_data = target_img->data; + other_data = other_img->data; + + if (!target_data) { + /* allocate memory for the target image now */ + alloc_image_data(lpr, level, layout); + target_data = target_img->data; + } + + if (face_slice > 0) { + unsigned target_offset, other_offset; + + target_offset = face_slice * tex_image_face_size(lpr, level, layout); + other_offset = face_slice * tex_image_face_size(lpr, level, other_layout); + if (target_data) { + target_data = (uint8_t *) target_data + target_offset; + } + if (other_data) { + other_data = (uint8_t *) other_data + other_offset; + } + } + + if (only_allocate) { + /* Just allocating tiled memory. Don't initialize it from the + * linear data if it exists. + */ + return target_data; + } + + if (other_data) { + /* may need to convert other data to the requested layout */ + enum lp_texture_layout new_layout; + unsigned x, y; + + /* loop over all image tiles, doing layout conversion where needed */ + for (y = 0; y < height_t; y++) { + for (x = 0; x < width_t; x++) { + enum lp_texture_layout cur_layout = + llvmpipe_get_texture_tile_layout(lpr, face_slice, level, x, y); + boolean convert; + + layout_logic(cur_layout, layout, usage, &new_layout, &convert); + + if (convert) { + if (layout == LP_TEX_LAYOUT_TILED) { + lp_linear_to_tiled(other_data, target_data, + x * TILE_SIZE, y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + lpr->base.format, + lpr->row_stride[level], + lpr->tiles_per_row[level]); + } + else { + lp_tiled_to_linear(other_data, target_data, + x * TILE_SIZE, y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + lpr->base.format, + lpr->row_stride[level], + lpr->tiles_per_row[level]); + } + } + + llvmpipe_set_texture_tile_layout(lpr, face_slice, level, x, y, + new_layout); + } + } + } + else { + /* no other data */ + llvmpipe_set_texture_image_layout(lpr, face_slice, level, + width_t, height_t, layout); + } + + assert(target_data); + + return target_data; +} + + +/** + * Return pointer to start of a texture image (1D, 2D, 3D, CUBE). + * All cube faces and 3D slices will be converted to the requested + * layout if needed. + * This is typically used when we're about to sample from a texture. + */ +void * +llvmpipe_get_texture_image_all(struct llvmpipe_resource *lpr, + unsigned level, + enum lp_texture_usage usage, + enum lp_texture_layout layout) +{ + const int slices = lpr->num_slices_faces[level]; + int slice; + void *map = NULL; + + assert(slices > 0); + + for (slice = slices - 1; slice >= 0; slice--) { + map = llvmpipe_get_texture_image(lpr, slice, level, usage, layout); + } + + return map; +} + + +/** + * Get pointer to a linear image (not the tile!) where the tile at (x,y) + * is known to be in linear layout. + * Conversion from tiled to linear will be done if necessary. + * \return pointer to start of image/face (not the tile) + */ +ubyte * +llvmpipe_get_texture_tile_linear(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + enum lp_texture_usage usage, + unsigned x, unsigned y) +{ + struct llvmpipe_texture_image *linear_img = &lpr->linear[level]; + enum lp_texture_layout cur_layout, new_layout; + const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE; + boolean convert; + uint8_t *tiled_image, *linear_image; + + assert(resource_is_texture(&lpr->base)); + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + + if (!linear_img->data) { + /* allocate memory for the linear image now */ + alloc_image_data(lpr, level, LP_TEX_LAYOUT_LINEAR); + } + + /* compute address of the slice/face of the image that contains the tile */ + tiled_image = llvmpipe_get_texture_image_address(lpr, face_slice, level, + LP_TEX_LAYOUT_TILED); + linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level, + LP_TEX_LAYOUT_LINEAR); + + /* get current tile layout and determine if data conversion is needed */ + cur_layout = llvmpipe_get_texture_tile_layout(lpr, face_slice, level, tx, ty); + + layout_logic(cur_layout, LP_TEX_LAYOUT_LINEAR, usage, + &new_layout, &convert); + + if (convert) { + lp_tiled_to_linear(tiled_image, linear_image, + x, y, TILE_SIZE, TILE_SIZE, lpr->base.format, + lpr->row_stride[level], + lpr->tiles_per_row[level]); } + + if (new_layout != cur_layout) + llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty, new_layout); + + return linear_image; +} + + +/** + * Get pointer to tiled data for rendering. + * \return pointer to the tiled data at the given tile position + */ +ubyte * +llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + enum lp_texture_usage usage, + unsigned x, unsigned y) +{ + struct llvmpipe_texture_image *tiled_img = &lpr->tiled[level]; + enum lp_texture_layout cur_layout, new_layout; + const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE; + boolean convert; + uint8_t *tiled_image, *linear_image; + unsigned tile_offset; + + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + + if (!tiled_img->data) { + /* allocate memory for the tiled image now */ + alloc_image_data(lpr, level, LP_TEX_LAYOUT_TILED); + } + + /* compute address of the slice/face of the image that contains the tile */ + tiled_image = llvmpipe_get_texture_image_address(lpr, face_slice, level, + LP_TEX_LAYOUT_TILED); + linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level, + LP_TEX_LAYOUT_LINEAR); + + /* get current tile layout and see if we need to convert the data */ + cur_layout = llvmpipe_get_texture_tile_layout(lpr, face_slice, level, tx, ty); + + layout_logic(cur_layout, LP_TEX_LAYOUT_TILED, usage, &new_layout, &convert); + if (convert) { + lp_linear_to_tiled(linear_image, tiled_image, + x, y, TILE_SIZE, TILE_SIZE, lpr->base.format, + lpr->row_stride[level], + lpr->tiles_per_row[level]); + } + + if (new_layout != cur_layout) + llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty, new_layout); + + /* compute, return address of the 64x64 tile */ + tile_offset = (ty * lpr->tiles_per_row[level] + tx) + * TILE_SIZE * TILE_SIZE * 4; + + return (ubyte *) tiled_image + tile_offset; } void -llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen) +llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen) { - screen->texture_create = llvmpipe_texture_create; - screen->texture_blanket = llvmpipe_texture_blanket; - screen->texture_destroy = llvmpipe_texture_destroy; + screen->resource_create = llvmpipe_resource_create; + screen->resource_destroy = llvmpipe_resource_destroy; + screen->resource_from_handle = llvmpipe_resource_from_handle; + screen->resource_get_handle = llvmpipe_resource_get_handle; + screen->user_buffer_create = llvmpipe_user_buffer_create; screen->get_tex_surface = llvmpipe_get_tex_surface; screen->tex_surface_destroy = llvmpipe_tex_surface_destroy; +} + - screen->get_tex_transfer = llvmpipe_get_tex_transfer; - screen->tex_transfer_destroy = llvmpipe_tex_transfer_destroy; - screen->transfer_map = llvmpipe_transfer_map; - screen->transfer_unmap = llvmpipe_transfer_unmap; +void +llvmpipe_init_context_resource_funcs(struct pipe_context *pipe) +{ + pipe->get_transfer = llvmpipe_get_transfer; + pipe->transfer_destroy = llvmpipe_transfer_destroy; + pipe->transfer_map = llvmpipe_transfer_map; + pipe->transfer_unmap = llvmpipe_transfer_unmap; + pipe->is_resource_referenced = llvmpipe_is_resource_referenced; + + pipe->transfer_flush_region = u_default_transfer_flush_region; + pipe->transfer_inline_write = u_default_transfer_inline_write; } diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 87c905bc02..858975bcee 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -30,33 +30,100 @@ #include "pipe/p_state.h" +#include "util/u_debug.h" + + +#define LP_MAX_TEXTURE_2D_LEVELS 12 /* 2K x 2K for now */ +#define LP_MAX_TEXTURE_3D_LEVELS 10 /* 512 x 512 x 512 for now */ + +#define LP_MAX_TEXTURE_LEVELS LP_MAX_TEXTURE_2D_LEVELS + + +enum lp_texture_usage +{ + LP_TEX_USAGE_READ = 100, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_USAGE_WRITE_ALL +}; + + +/** Per-tile layout mode */ +enum lp_texture_layout +{ + LP_TEX_LAYOUT_NONE = 0, /**< no layout for the tile data yet */ + LP_TEX_LAYOUT_TILED, /**< the tile data is in tiled layout */ + LP_TEX_LAYOUT_LINEAR, /**< the tile data is in linear layout */ + LP_TEX_LAYOUT_BOTH /**< the tile data is in both modes */ +}; struct pipe_context; struct pipe_screen; struct llvmpipe_context; -struct llvmpipe_displaytarget; +struct sw_displaytarget; -struct llvmpipe_texture + +/** + * We keep one or two copies of the texture image data: one in a simple + * linear layout (for texture sampling) and another in a tiled layout (for + * render targets). We keep track of whether each image tile is linear + * or tiled on a per-tile basis. + */ + + +/** A 1D/2D/3D image, one mipmap level */ +struct llvmpipe_texture_image +{ + void *data; +}; + + +/** + * llvmpipe subclass of pipe_resource. A texture, drawing surface, + * vertex buffer, const buffer, etc. + * Textures are stored differently than othere types of objects such as + * vertex buffers and const buffers. + * The former are tiled and have per-tile layout flags. + * The later are simple malloc'd blocks of memory. + */ +struct llvmpipe_resource { - struct pipe_texture base; + struct pipe_resource base; - unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned stride[PIPE_MAX_TEXTURE_LEVELS]; + /** Row stride in bytes */ + unsigned row_stride[LP_MAX_TEXTURE_LEVELS]; + /** Image stride (for cube maps or 3D textures) in bytes */ + unsigned img_stride[LP_MAX_TEXTURE_LEVELS]; + unsigned tiles_per_row[LP_MAX_TEXTURE_LEVELS]; + unsigned tiles_per_image[LP_MAX_TEXTURE_LEVELS]; + /** Number of 3D slices or cube faces per level */ + unsigned num_slices_faces[LP_MAX_TEXTURE_LEVELS]; /** - * Display target, for textures with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET + * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET * usage. */ - struct llvmpipe_displaytarget *dt; + struct sw_displaytarget *dt; /** * Malloc'ed data for regular textures, or a mapping to dt above. */ + struct llvmpipe_texture_image tiled[LP_MAX_TEXTURE_LEVELS]; + struct llvmpipe_texture_image linear[LP_MAX_TEXTURE_LEVELS]; + + /** + * Data for non-texture resources. + */ void *data; + /** array [level][face or slice][tile_y][tile_x] of layout values) */ + enum lp_texture_layout *layout[LP_MAX_TEXTURE_LEVELS]; + + boolean userBuffer; /** Is this a user-space buffer? */ unsigned timestamp; + + unsigned id; /**< temporary, for debugging */ }; @@ -69,17 +136,17 @@ struct llvmpipe_transfer /** cast wrappers */ -static INLINE struct llvmpipe_texture * -llvmpipe_texture(struct pipe_texture *pt) +static INLINE struct llvmpipe_resource * +llvmpipe_resource(struct pipe_resource *pt) { - return (struct llvmpipe_texture *) pt; + return (struct llvmpipe_resource *) pt; } -static INLINE const struct llvmpipe_texture * -llvmpipe_texture_const(const struct pipe_texture *pt) +static INLINE const struct llvmpipe_resource * +llvmpipe_resource_const(const struct pipe_resource *pt) { - return (const struct llvmpipe_texture *) pt; + return (const struct llvmpipe_resource *) pt; } @@ -90,8 +157,73 @@ llvmpipe_transfer(struct pipe_transfer *pt) } +void llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen); +void llvmpipe_init_context_resource_funcs(struct pipe_context *pipe); + +static INLINE unsigned +llvmpipe_resource_stride(struct pipe_resource *resource, + unsigned level) +{ + struct llvmpipe_resource *lpr = llvmpipe_resource(resource); + assert(level < LP_MAX_TEXTURE_2D_LEVELS); + return lpr->row_stride[level]; +} + + +void * +llvmpipe_resource_map(struct pipe_resource *resource, + unsigned face_slice, + unsigned level, + unsigned zslice, + enum lp_texture_usage tex_usage, + enum lp_texture_layout layout); + +void +llvmpipe_resource_unmap(struct pipe_resource *resource, + unsigned face_slice, + unsigned level, + unsigned zslice); + + +void * +llvmpipe_resource_data(struct pipe_resource *resource); + + +ubyte * +llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + enum lp_texture_layout layout); + +void * +llvmpipe_get_texture_image(struct llvmpipe_resource *resource, + unsigned face_slice, unsigned level, + enum lp_texture_usage usage, + enum lp_texture_layout layout); + +void * +llvmpipe_get_texture_image_all(struct llvmpipe_resource *lpr, + unsigned level, + enum lp_texture_usage usage, + enum lp_texture_layout layout); + +ubyte * +llvmpipe_get_texture_tile_linear(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + enum lp_texture_usage usage, + unsigned x, unsigned y); + +ubyte * +llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + enum lp_texture_usage usage, + unsigned x, unsigned y); + + + extern void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen); +extern void +llvmpipe_init_context_texture_funcs(struct pipe_context *pipe); #endif /* LP_TEXTURE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c index c1980b316d..7a2cc3e6b5 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c @@ -25,6 +25,14 @@ **************************************************************************/ +/** + * Code to convert images from tiled to linear and back. + * XXX there are quite a few assumptions about color and z/stencil being + * 32bpp. + */ + + +#include "util/u_format.h" #include "lp_tile_soa.h" #include "lp_tile_image.h" @@ -33,33 +41,171 @@ /** + * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout + * at dst, with dst_stride words between rows. + */ +static void +untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride) +{ + uint32_t *d0 = dst; + uint32_t *d1 = d0 + dst_stride; + uint32_t *d2 = d1 + dst_stride; + uint32_t *d3 = d2 + dst_stride; + + d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5]; + d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7]; + d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13]; + d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15]; +} + + + +/** + * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout + * at dst, with dst_stride words between rows. + */ +static void +untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride) +{ + uint16_t *d0 = dst; + uint16_t *d1 = d0 + dst_stride; + uint16_t *d2 = d1 + dst_stride; + uint16_t *d3 = d2 + dst_stride; + + d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5]; + d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7]; + d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13]; + d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15]; +} + + + +/** + * Convert a 4x4 rect of 32-bit words from a linear layout into tiled + * layout (in which all 16 words are contiguous). + */ +static void +tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride) +{ + const uint32_t *s0 = src; + const uint32_t *s1 = s0 + src_stride; + const uint32_t *s2 = s1 + src_stride; + const uint32_t *s3 = s2 + src_stride; + + dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3]; + dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3]; + dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3]; + dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3]; +} + + + +/** + * Convert a 4x4 rect of 16-bit words from a linear layout into tiled + * layout (in which all 16 words are contiguous). + */ +static void +tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride) +{ + const uint16_t *s0 = src; + const uint16_t *s1 = s0 + src_stride; + const uint16_t *s2 = s1 + src_stride; + const uint16_t *s3 = s2 + src_stride; + + dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3]; + dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3]; + dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3]; + dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3]; +} + + + +/** * Convert a tiled image into a linear image. - * \param src_stride source row stride in bytes (bytes per row of tiles) * \param dst_stride dest row stride in bytes */ void -lp_tiled_to_linear(const uint8_t *src, - uint8_t *dst, +lp_tiled_to_linear(const void *src, void *dst, + unsigned x, unsigned y, unsigned width, unsigned height, enum pipe_format format, - unsigned src_stride, - unsigned dst_stride) + unsigned dst_stride, + unsigned tiles_per_row) { - const unsigned tiles_per_row = src_stride / BYTES_PER_TILE; - unsigned i, j; - - for (j = 0; j < height; j += TILE_SIZE) { - for (i = 0; i < width; i += TILE_SIZE) { - unsigned tile_offset = - ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); - unsigned byte_offset = tile_offset * BYTES_PER_TILE; - const uint8_t *src_tile = src + byte_offset; - - lp_tile_write_4ub(format, - src_tile, - dst, - dst_stride, - i, j, TILE_SIZE, TILE_SIZE); + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + /*assert(width % TILE_SIZE == 0); + assert(height % TILE_SIZE == 0);*/ + + /* Note that Z/stencil surfaces use a different tiling size than + * color surfaces. + */ + if (util_format_is_depth_or_stencil(format)) { + const uint bpp = util_format_get_blocksize(format); + const uint src_stride = dst_stride * TILE_VECTOR_WIDTH; + const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT; + const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp); + + dst_stride /= bpp; /* convert from bytes to words */ + + if (bpp == 4) { + const uint32_t *src32 = (const uint32_t *) src; + uint32_t *dst32 = (uint32_t *) dst; + uint i, j; + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 32-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + uint dst_offset = jj * dst_stride + ii; + untile_4_4_uint32(src32 + src_offset, + dst32 + dst_offset, + dst_stride); + } + } + } + else { + const uint16_t *src16 = (const uint16_t *) src; + uint16_t *dst16 = (uint16_t *) dst; + uint i, j; + + assert(bpp == 2); + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 16-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + uint dst_offset = jj * dst_stride + ii; + untile_4_4_uint16(src16 + src_offset, + dst16 + dst_offset, + dst_stride); + } + } + } + } + else { + /* color image */ + const uint bpp = 4; + const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; + const uint bytes_per_tile = tile_w * tile_h * bpp; + uint i, j; + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + uint ii = i + x, jj = j + y; + uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w); + uint byte_offset = tile_offset * bytes_per_tile; + const uint8_t *src_tile = (uint8_t *) src + byte_offset; + + lp_tile_write_4ub(format, + src_tile, + dst, dst_stride, + ii, jj, tile_w, tile_h); + } } } } @@ -68,31 +214,87 @@ lp_tiled_to_linear(const uint8_t *src, /** * Convert a linear image into a tiled image. * \param src_stride source row stride in bytes - * \param dst_stride dest row stride in bytes (bytes per row of tiles) */ void -lp_linear_to_tiled(const uint8_t *src, - uint8_t *dst, +lp_linear_to_tiled(const void *src, void *dst, + unsigned x, unsigned y, unsigned width, unsigned height, enum pipe_format format, unsigned src_stride, - unsigned dst_stride) + unsigned tiles_per_row) { - const unsigned tiles_per_row = dst_stride / BYTES_PER_TILE; - unsigned i, j; - - for (j = 0; j < height; j += TILE_SIZE) { - for (i = 0; i < width; i += TILE_SIZE) { - unsigned tile_offset = - ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); - unsigned byte_offset = tile_offset * BYTES_PER_TILE; - uint8_t *dst_tile = dst + byte_offset; - - lp_tile_read_4ub(format, - dst_tile, - src, - src_stride, - i, j, TILE_SIZE, TILE_SIZE); + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + /* + assert(width % TILE_SIZE == 0); + assert(height % TILE_SIZE == 0); + */ + + if (util_format_is_depth_or_stencil(format)) { + const uint bpp = util_format_get_blocksize(format); + const uint dst_stride = src_stride * TILE_VECTOR_WIDTH; + const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT; + const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp); + + src_stride /= bpp; /* convert from bytes to words */ + + if (bpp == 4) { + const uint32_t *src32 = (const uint32_t *) src; + uint32_t *dst32 = (uint32_t *) dst; + uint i, j; + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 32-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = jj * src_stride + ii; + uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + tile_4_4_uint32(src32 + src_offset, + dst32 + dst_offset, + src_stride); + } + } + } + else { + const uint16_t *src16 = (const uint16_t *) src; + uint16_t *dst16 = (uint16_t *) dst; + uint i, j; + + assert(bpp == 2); + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 16-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = jj * src_stride + ii; + uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + tile_4_4_uint16(src16 + src_offset, + dst16 + dst_offset, + src_stride); + } + } + } + } + else { + const uint bpp = 4; + const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; + const uint bytes_per_tile = tile_w * tile_h * bpp; + uint i, j; + + for (j = 0; j < height; j += TILE_SIZE) { + for (i = 0; i < width; i += TILE_SIZE) { + uint ii = i + x, jj = j + y; + uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w); + uint byte_offset = tile_offset * bytes_per_tile; + uint8_t *dst_tile = (uint8_t *) dst + byte_offset; + + lp_tile_read_4ub(format, + dst_tile, + src, src_stride, + ii, jj, tile_w, tile_h); + } } } } @@ -102,7 +304,7 @@ lp_linear_to_tiled(const uint8_t *src, * For testing only. */ void -test_tiled_linear_conversion(uint8_t *data, +test_tiled_linear_conversion(void *data, enum pipe_format format, unsigned width, unsigned height, unsigned stride) @@ -113,13 +315,13 @@ test_tiled_linear_conversion(uint8_t *data, uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4); - unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4; + /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/ - lp_linear_to_tiled(data, tiled, width, height, format, - stride, tiled_stride); + lp_linear_to_tiled(data, tiled, 0, 0, width, height, format, + stride, wt); - lp_tiled_to_linear(tiled, data, width, height, format, - tiled_stride, stride); + lp_tiled_to_linear(tiled, data, 0, 0, width, height, format, + stride, wt); free(tiled); } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.h b/src/gallium/drivers/llvmpipe/lp_tile_image.h index 60d472e8c5..8de8efc6c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.h @@ -30,25 +30,25 @@ void -lp_tiled_to_linear(const uint8_t *src, - uint8_t *dst, +lp_tiled_to_linear(const void *src, void *dst, + unsigned x, unsigned y, unsigned width, unsigned height, enum pipe_format format, - unsigned src_stride, - unsigned dst_stride); + unsigned dst_stride, + unsigned tiles_per_row); void -lp_linear_to_tiled(const uint8_t *src, - uint8_t *dst, +lp_linear_to_tiled(const void *src, void *dst, + unsigned x, unsigned y, unsigned width, unsigned height, enum pipe_format format, unsigned src_stride, - unsigned dst_stride); + unsigned tiles_per_row); void -test_tiled_linear_conversion(uint8_t *data, +test_tiled_linear_conversion(void *data, enum pipe_format format, unsigned width, unsigned height, unsigned stride); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index eea3ab8499..9d6a88afec 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -50,11 +50,26 @@ tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; #define TILE_X_STRIDE (NUM_CHANNELS * TILE_C_STRIDE) //64 #define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT * TILE_SIZE * NUM_CHANNELS) //1024 -#define TILE_PIXEL(_p, _x, _y, _c) \ - ((_p)[((_y) / TILE_VECTOR_HEIGHT) * TILE_Y_STRIDE + \ - ((_x) / TILE_VECTOR_WIDTH) * TILE_X_STRIDE + \ - (_c) * TILE_C_STRIDE + \ - tile_offset[(_y) % TILE_VECTOR_HEIGHT][(_x) % TILE_VECTOR_WIDTH]]) + +extern int tile_write_count, tile_read_count; + + +/** + * Return offset of the given pixel (and color channel) from the start + * of a tile, in bytes. + */ +static INLINE unsigned +tile_pixel_offset(unsigned x, unsigned y, unsigned c) +{ + unsigned ix = (x / TILE_VECTOR_WIDTH) * TILE_X_STRIDE; + unsigned iy = (y / TILE_VECTOR_HEIGHT) * TILE_Y_STRIDE; + unsigned offset = iy + ix + c * TILE_C_STRIDE + + tile_offset[y % TILE_VECTOR_HEIGHT][x % TILE_VECTOR_WIDTH]; + return offset; +} + + +#define TILE_PIXEL(_p, _x, _y, _c) ((_p)[tile_pixel_offset(_x, _y, _c)]) void diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 00b8d4fc38..65810b6f8f 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -42,7 +42,29 @@ import os.path sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/util')) -from u_format_access import * +from u_format_pack import * + + +def is_format_supported(format): + '''Determines whether we actually have the plumbing necessary to generate the + to read/write to/from this format.''' + + # FIXME: Ideally we would support any format combination here. + + if format.layout != PLAIN: + return False + + for i in range(4): + channel = format.channels[i] + if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT): + return False + if channel.type == FLOAT and channel.size not in (32 ,64): + return False + + if format.colorspace not in ('rgb', 'srgb'): + return False + + return True def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): @@ -62,7 +84,7 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): print ' for (x = 0; x < w; ++x) {' names = ['']*4 - if format.colorspace == 'rgb': + if format.colorspace in ('rgb', 'srgb'): for i in range(4): swizzle = format.swizzles[i] if swizzle < 4: @@ -95,16 +117,21 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): shift += width else: for i in range(4): + if names[i]: + print ' %s %s;' % (dst_native_type, names[i]) + for i in range(4): src_channel = format.channels[i] if names[i]: value = '(*src_pixel++)' value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) - print ' %s %s = %s;' % (dst_native_type, names[i], value) + print ' %s = %s;' % (names[i], value) + elif src_channel.size: + print ' ++src_pixel;' else: assert False for i in range(4): - if format.colorspace == 'rgb': + if format.colorspace in ('rgb', 'srgb'): swizzle = format.swizzles[i] if swizzle < 4: value = names[swizzle] @@ -134,7 +161,7 @@ def pack_rgba(format, src_channel, r, g, b, a): """Return an expression for packing r, g, b, a into a pixel of the given format. Ex: '(b << 24) | (g << 16) | (r << 8) | (a << 0)' """ - assert format.colorspace == 'rgb' + assert format.colorspace in ('rgb', 'srgb') inv_swizzle = format.inv_swizzles() shift = 0 expr = None @@ -230,6 +257,8 @@ def emit_tile_pixel_write_code(format, src_channel): value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) print ' *dst_pixel++ = %s;' % value + else: + print ' ++dst_pixel;' else: assert False @@ -251,7 +280,8 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): and format.block_size() <= 32 \ and format.is_pot() \ and not format.is_mixed() \ - and format.channels[0].type == UNSIGNED: + and (format.channels[0].type == UNSIGNED \ + or format.channels[1].type == UNSIGNED): emit_unrolled_write_code(format, src_channel) else: emit_tile_pixel_write_code(format, src_channel) @@ -270,6 +300,7 @@ def generate_read(formats, dst_channel, dst_native_type, dst_suffix): print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) print '{' print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type + print ' tile_read_count += 1;' print ' switch(format) {' for format in formats: if is_format_supported(format): @@ -277,7 +308,7 @@ def generate_read(formats, dst_channel, dst_native_type, dst_suffix): print ' func = &lp_tile_%s_read_%s;' % (format.short_name(), dst_suffix) print ' break;' print ' default:' - print ' debug_printf("unsupported format\\n");' + print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' print ' return;' print ' }' print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);' @@ -297,6 +328,7 @@ def generate_write(formats, src_channel, src_native_type, src_suffix): print '{' print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type + print ' tile_write_count += 1;' print ' switch(format) {' for format in formats: if is_format_supported(format): @@ -304,7 +336,7 @@ def generate_write(formats, src_channel, src_native_type, src_suffix): print ' func = &lp_tile_%s_write_%s;' % (format.short_name(), src_suffix) print ' break;' print ' default:' - print ' debug_printf("unsupported format\\n");' + print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' print ' return;' print ' }' print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);' @@ -325,8 +357,11 @@ def main(): print '#include "pipe/p_compiler.h"' print '#include "util/u_format.h"' print '#include "util/u_math.h"' + print '#include "util/u_half.h"' print '#include "lp_tile_soa.h"' print + print 'int tile_write_count=0, tile_read_count=0;' + print print 'const unsigned char' print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {' print ' { 0, 1, 4, 5},' @@ -349,8 +384,6 @@ def main(): print '};' print - generate_clamp() - channel = Channel(UNSIGNED, True, 8) native_type = 'uint8_t' suffix = '4ub' diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h deleted file mode 100644 index ce11fa9304..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_winsys.h +++ /dev/null @@ -1,125 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * llvmpipe public interface. - */ - - -#ifndef LP_WINSYS_H -#define LP_WINSYS_H - - -#include "pipe/p_compiler.h" /* for boolean */ -#include "pipe/p_format.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -struct pipe_screen; -struct pipe_context; - - -/** - * Opaque pointer. - */ -struct llvmpipe_displaytarget; - - -/** - * This is the interface that llvmpipe expects any window system - * hosting it to implement. - * - * llvmpipe is for the most part a self sufficient driver. The only thing it - * does not know is how to display a surface. - */ -struct llvmpipe_winsys -{ - void - (*destroy)( struct llvmpipe_winsys *ws ); - - boolean - (*is_displaytarget_format_supported)( struct llvmpipe_winsys *ws, - enum pipe_format format ); - - /** - * Allocate storage for a render target. - * - * Often surfaces which are meant to be blitted to the front screen (i.e., - * display targets) must be allocated with special characteristics, memory - * pools, or obtained directly from the windowing system. - * - * This callback is invoked by the pipe_screen when creating a texture marked - * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying - * storage. - */ - struct llvmpipe_displaytarget * - (*displaytarget_create)( struct llvmpipe_winsys *ws, - enum pipe_format format, - unsigned width, unsigned height, - unsigned alignment, - unsigned *stride ); - - void * - (*displaytarget_map)( struct llvmpipe_winsys *ws, - struct llvmpipe_displaytarget *dt, - unsigned flags ); - - void - (*displaytarget_unmap)( struct llvmpipe_winsys *ws, - struct llvmpipe_displaytarget *dt ); - - /** - * @sa pipe_screen:flush_frontbuffer. - * - * This call will likely become asynchronous eventually. - */ - void - (*displaytarget_display)( struct llvmpipe_winsys *ws, - struct llvmpipe_displaytarget *dt, - void *context_private ); - - void - (*displaytarget_destroy)( struct llvmpipe_winsys *ws, - struct llvmpipe_displaytarget *dt ); -}; - - - -struct pipe_screen * -llvmpipe_create_screen( struct llvmpipe_winsys * ); - - -#ifdef __cplusplus -} -#endif - -#endif /* LP_WINSYS_H */ |