From 8be72bb7646d430e66cb36e09c13c13bee030d53 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 6 Sep 2009 11:20:14 +0100 Subject: llvmpipe: Further abstract the texture sampling generation from TGSI translation. --- src/gallium/drivers/llvmpipe/lp_bld_tgsi.h | 35 +++++-- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 16 +-- src/gallium/drivers/llvmpipe/lp_state_fs.c | 115 ++------------------- src/gallium/drivers/llvmpipe/lp_tex_sample.c | 133 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_tex_sample.h | 12 +++ 5 files changed, 186 insertions(+), 125 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h index 912db24aec..10c251c416 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h @@ -44,14 +44,30 @@ struct lp_build_context; struct lp_build_mask_context; -typedef void -(*lp_emit_fetch_texel_soa_callback)( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel); +/** + * Sampler code generation interface. + * + * Although texture sampling is a requirement for TGSI translation, it is + * a very different problem with several different approaches to it. This + * structure establishes an interface for texture sampling code generation, so + * that we can easily use different texture sampling strategies. + */ +struct lp_build_sampler_soa +{ + void + (*destroy)( struct lp_build_sampler_soa *sampler ); + + void + (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); +}; + void lp_build_tgsi_soa(LLVMBuilderRef builder, @@ -62,8 +78,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context); + struct lp_build_sampler_soa *sampler); #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index d4d18febec..3ce379de12 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -88,8 +88,7 @@ struct lp_build_tgsi_soa_context const LLVMValueRef (*inputs)[NUM_CHANNELS]; LLVMValueRef (*outputs)[NUM_CHANNELS]; - lp_emit_fetch_texel_soa_callback emit_fetch_texel; - void *emit_fetch_texel_context; + struct lp_build_sampler_soa *sampler; LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; @@ -289,8 +288,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, coords[i] = lp_build_mul(&bld->base, coords[i], oow); } - bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context, - unit, num_coords, coords, lodbias, texel); + bld->sampler->emit_fetch_texel(bld->sampler, + bld->base.builder, + bld->base.type, + unit, num_coords, coords, lodbias, + texel); FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) { emit_store( bld, inst, 0, i, texel[i] ); @@ -1283,8 +1285,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context) + struct lp_build_sampler_soa *sampler) { struct lp_build_tgsi_soa_context bld; struct tgsi_parse_context parse; @@ -1299,8 +1300,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.inputs = inputs; bld.outputs = outputs; bld.consts_ptr = consts_ptr; - bld.emit_fetch_texel = emit_fetch_texel; - bld.emit_fetch_texel_context = emit_fetch_texel_context; + bld.sampler = sampler; tgsi_parse_init( &parse, tokens ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 94170bd716..80f705c871 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -85,6 +85,7 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_quad.h" +#include "lp_tex_sample.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -173,107 +174,6 @@ generate_depth(struct llvmpipe_context *lp, } -struct build_fetch_texel_context -{ - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -static void -emit_fetch_texel( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct build_fetch_texel_context *bld = context; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!bld->samplers_ptr) - bld->samplers_ptr = lp_jit_context_samplers(builder, bld->context_ptr); - - if(!bld->store_ptr) - bld->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = bld->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = bld->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - /** * Generate the fragment shader, depth/stencil test, and alpha tests. */ @@ -286,7 +186,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef context_ptr, unsigned i, const struct lp_build_interp_soa_context *interp, - struct build_fetch_texel_context *sampler, + struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef *color, LLVMValueRef depth_ptr) @@ -327,7 +227,7 @@ generate_fs(struct llvmpipe_context *lp, lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, emit_fetch_texel, sampler); + outputs, sampler); for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -462,7 +362,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; - struct build_fetch_texel_context sampler; + struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; @@ -586,8 +486,7 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); - memset(&sampler, 0, sizeof sampler); - sampler.context_ptr = context_ptr; + sampler = lp_c_sampler_soa_create(context_ptr); for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -606,7 +505,7 @@ generate_fragment(struct llvmpipe_context *lp, context_ptr, i, &interp, - &sampler, + sampler, &fs_mask[i], out_color, depth_ptr_i); @@ -615,6 +514,8 @@ generate_fragment(struct llvmpipe_context *lp, fs_out_color[chan][i] = out_color[chan]; } + sampler->destroy(sampler); + /* * Convert the fs's output color and mask to fit to the blending type. */ diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index 94eb6dad5a..9a876f404d 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c @@ -1578,3 +1578,136 @@ out: tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); } + +void PIPE_CDECL +lp_fetch_texel_soa( struct tgsi_sampler **samplers, + uint32_t unit, + float *store ) +{ + struct tgsi_sampler *sampler = samplers[unit]; + +#if 0 + uint j; + + debug_printf("%s sampler: %p (%p) store: %p\n", + __FUNCTION__, + sampler, *sampler, + store ); + + debug_printf("lodbias %f\n", store[12]); + + for (j = 0; j < 4; j++) + debug_printf("sample %d texcoord %f %f\n", + j, + store[0+j], + store[4+j]); +#endif + + { + float rgba[NUM_CHANNELS][QUAD_SIZE]; + sampler->get_samples(sampler, + &store[0], + &store[4], + &store[8], + 0.0f, /*store[12], lodbias */ + rgba); + memcpy(store, rgba, sizeof rgba); + } + +#if 0 + for (j = 0; j < 4; j++) + debug_printf("sample %d result %f %f %f %f\n", + j, + store[0+j], + store[4+j], + store[8+j], + store[12+j]); +#endif +} + + +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_tgsi.h" + + +struct lp_c_sampler_soa +{ + struct lp_build_sampler_soa base; + + LLVMValueRef context_ptr; + + LLVMValueRef samplers_ptr; + + /** Coords/texels store */ + LLVMValueRef store_ptr; +}; + + +static void +lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; + LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); + LLVMValueRef args[3]; + unsigned i; + + if(!sampler->samplers_ptr) + sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); + + if(!sampler->store_ptr) + sampler->store_ptr = LLVMBuildArrayAlloca(builder, + vec_type, + LLVMConstInt(LLVMInt32Type(), 4, 0), + "texel_store"); + + for (i = 0; i < num_coords; i++) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + LLVMBuildStore(builder, coords[i], coord_ptr); + } + + args[0] = sampler->samplers_ptr; + args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); + args[2] = sampler->store_ptr; + + lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); + } +} + + +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr) +{ + struct lp_c_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_c_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_c_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; + sampler->context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 628ec3f1ef..7d1e565885 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -29,6 +29,8 @@ #define LP_TEX_SAMPLE_H +#include + #include "tgsi/tgsi_exec.h" @@ -75,4 +77,14 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler, float rgba[NUM_CHANNELS][QUAD_SIZE]); +/** + * Texture sampling code generator that just calls lp_get_samples C function + * for the actual sampling computation. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr); + + #endif /* LP_TEX_SAMPLE_H */ -- cgit v1.2.3 From 866fbacf2bf93282f622f1f455250491d0b3b63f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 7 Sep 2009 14:24:31 +0100 Subject: llvmpipe: SoA pixel unpacking specialization. --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_bld_format.h | 62 +++++--- src/gallium/drivers/llvmpipe/lp_bld_format_aos.c | 30 ++-- src/gallium/drivers/llvmpipe/lp_bld_format_soa.c | 193 +++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_test_format.c | 4 +- 6 files changed, 252 insertions(+), 39 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_format_soa.c (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 6e63a0c2b7..54bb3a0e73 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -15,6 +15,7 @@ C_SOURCES = \ lp_bld_depth.c \ lp_bld_flow.c \ lp_bld_format_aos.c \ + lp_bld_format_soa.c \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5c29bdac56..3dcd5cb994 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -23,6 +23,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', + 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', 'lp_bld_struct.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 01c8a752d1..5ee0656093 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -31,20 +31,14 @@ /** * @file - * LLVM IR building helpers interfaces. - * - * We use LLVM-C bindings for now. They are not documented, but follow the C++ - * interfaces very closely, and appear to be complete enough for code - * genration. See - * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - * for a standalone example. + * Pixel format helpers. */ #include - -#include "pipe/p_format.h" +#include "pipe/p_format.h" +struct util_format_description; union lp_type; @@ -56,9 +50,9 @@ union lp_type; * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed); +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed); /** @@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba); +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba); /** @@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr); +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr); /** @@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba); +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba); +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba); + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba); #endif /* !LP_BLD_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index dcbc0076c7..b9b5d84bed 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -32,9 +32,9 @@ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed) +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed) { const struct util_format_description *desc; LLVMTypeRef type; @@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba) +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr) +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr) { const struct util_format_description *desc; LLVMTypeRef type; @@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder, packed = LLVMBuildLoad(builder, ptr, ""); - return lp_build_unpack_rgba(builder, format, packed); + return lp_build_unpack_rgba_aos(builder, format, packed); } void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba) +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder, type = LLVMIntType(desc->block.bits); - packed = lp_build_pack_rgba(builder, format, rgba); + packed = lp_build_pack_rgba_aos(builder, format, rgba); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c new file mode 100644 index 0000000000..36bac06d2e --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -0,0 +1,193 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_format.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_format.h" + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef inputs[4]; + unsigned start; + unsigned chan; + + /* FIXME: Support more formats */ + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + /* Decode the input vector components */ + start = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned width = format_desc->channel[chan].size; + unsigned stop = start + width; + LLVMValueRef input; + + input = packed; + + switch(format_desc->channel[chan].type) { + case UTIL_FORMAT_TYPE_VOID: + input = NULL; + break; + + case UTIL_FORMAT_TYPE_UNSIGNED: + if(type.floating) { + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), ""); + if(stop < format_desc->block.bits) { + unsigned mask = ((unsigned long long)1 << width) - 1; + input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), ""); + } + + if(format_desc->channel[chan].normalized) + input = lp_build_unsigned_norm_to_float(builder, width, type, input); + else + input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), ""); + } + else { + /* FIXME */ + assert(0); + input = lp_build_undef(type); + } + break; + + default: + /* fall through */ + input = lp_build_undef(type); + break; + } + + inputs[chan] = input; + + start = stop; + } + + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + rgba[chan] = inputs[swizzle]; + break; + case UTIL_FORMAT_SWIZZLE_0: + rgba[chan] = lp_build_zero(type); + break; + case UTIL_FORMAT_SWIZZLE_1: + rgba[chan] = lp_build_one(type); + break; + case UTIL_FORMAT_SWIZZLE_NONE: + rgba[chan] = lp_build_undef(type); + break; + } + } +} + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba) +{ + LLVMValueRef packed; + + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + packed = lp_build_gather(builder, + type.length, format_desc->block.bits, type.width, + base_ptr, offsets); + + lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); +} diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 1d192355ee..d8455e5649 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -119,7 +119,7 @@ add_load_rgba_test(LLVMModuleRef module, lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop); - rgba = lp_build_load_rgba(builder, format, ptr); + rgba = lp_build_load_rgba_aos(builder, format, ptr); LLVMBuildStore(builder, rgba, rgba_ptr); lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop); @@ -160,7 +160,7 @@ add_store_rgba_test(LLVMModuleRef module, rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - lp_build_store_rgba(builder, format, ptr, rgba); + lp_build_store_rgba_aos(builder, format, ptr, rgba); LLVMBuildRetVoid(builder); -- cgit v1.2.3 From b1eff018c7a07502c673032ef6426f49364146be Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 7 Sep 2009 14:25:02 +0100 Subject: llvmpipe: Utility function to get the pointer to a structure member. --- src/gallium/drivers/llvmpipe/lp_bld_struct.c | 21 +++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_bld_struct.h | 12 ++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c index 14d2b10df9..3998ac374f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.c @@ -41,18 +41,31 @@ #include "lp_bld_struct.h" +LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name) +{ + LLVMValueRef indices[2]; + LLVMValueRef member_ptr; + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); + member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name); + return member_ptr; +} + + LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, unsigned member, const char *name) { - LLVMValueRef indices[2]; LLVMValueRef member_ptr; LLVMValueRef res; - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); - member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name); res = LLVMBuildLoad(builder, member_ptr, ""); lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name); return res; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h index cbefdc9f81..740392f561 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.h @@ -53,6 +53,18 @@ offsetof(_ctype, _cmember)) +/** + * Get value pointer to a structure member. + */ +LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name); + +/** + * Get the value of a structure member. + */ LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, -- cgit v1.2.3 From 4da20234f3ea9b1606522fd0a9f4ef5c4b903906 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 7 Sep 2009 14:26:30 +0100 Subject: llvmpipe: Correct implementation of floor. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 83 ++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_arit.h | 12 +++++ 2 files changed, 95 insertions(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 09a57ff33d..eb9e579bdd 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -587,6 +587,89 @@ lp_build_abs(struct lp_build_context *bld, } +enum lp_build_round_sse41_mode +{ + LP_BUILD_ROUND_SSE41_NEAREST = 0, + LP_BUILD_ROUND_SSE41_FLOOR = 1, + LP_BUILD_ROUND_SSE41_CEIL = 2, + LP_BUILD_ROUND_SSE41_TRUNCATE = 3 +}; + + +static INLINE LLVMValueRef +lp_build_round_sse41(struct lp_build_context *bld, + LLVMValueRef a, + enum lp_build_round_sse41_mode mode) +{ + const union lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + const char *intrinsic; + + assert(type.floating); + assert(type.width*type.length == 128); + + switch(type.width) { + case 32: + intrinsic = "llvm.x86.sse41.round.ps"; + break; + case 64: + intrinsic = "llvm.x86.sse41.round.pd"; + break; + default: + assert(0); + return bld->undef; + } + + return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, + LLVMConstInt(LLVMInt32Type(), mode, 0)); +} + + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +/** + * Convert to integer, through whichever rounding method that's fastest, + * typically truncating to zero. + */ +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + + assert(type.floating); + + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); +} + + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a) +{ + a = lp_build_floor(bld, a); + a = lp_build_int(bld, a); + return a; +} + + LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index fc8cb25966..0732b9fa75 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -85,6 +85,18 @@ LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a); +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a); + LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a); -- cgit v1.2.3 From fa0f4b35be17f68667edd6a2757b89086a11a833 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 7 Sep 2009 14:27:06 +0100 Subject: llvmpipe: Utility functions for linear and bilinear interpolation. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 25 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_arit.h | 20 ++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index eb9e579bdd..a1d8a89774 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -502,6 +502,31 @@ lp_build_div(struct lp_build_context *bld, } +LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1) +{ + return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); +} + + +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11) +{ + LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01); + LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11); + return lp_build_lerp(bld, y, v0, v1); +} + + /** * Generate min(a, b) * Do checks for special cases. diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 0732b9fa75..383c3c3313 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -71,6 +71,26 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1); + +/** + * Bilinear interpolation. + * + * Values indices are in v_{yx}. + */ +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11); + LLVMValueRef lp_build_min(struct lp_build_context *bld, LLVMValueRef a, -- cgit v1.2.3 From 0c2ea2433833d5eda8a4fefe1412bf0ea40b14bf Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 7 Sep 2009 14:42:57 +0100 Subject: llvmpipe: Convenience function to obtain the integer type with same bitdepth of an arbitrary type. --- src/gallium/drivers/llvmpipe/lp_bld_type.c | 11 +++++++++++ src/gallium/drivers/llvmpipe/lp_bld_type.h | 4 ++++ 2 files changed, 15 insertions(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 8e0026fd97..577644b7ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -157,6 +157,17 @@ lp_build_int_vec_type(union lp_type type) } +union lp_type +lp_int_type(union lp_type type) +{ + union lp_type int_type; + int_type.value = 0; + int_type.width = type.width; + int_type.length = type.length; + return int_type; +} + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 3ce566be64..9933e0b45c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -165,6 +165,10 @@ LLVMTypeRef lp_build_int_vec_type(union lp_type type); +union lp_type +lp_int_type(union lp_type type); + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, -- cgit v1.2.3 From de8376e2f22a59a0bc18bb7ddab88ee3153678b8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 7 Sep 2009 14:43:51 +0100 Subject: llvmpipe: Texture sampling code generation primitives. Only supports single level 2d textures, with neareast and bilinear filtering for now. --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_bld_sample.h | 134 +++++++++ src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 342 +++++++++++++++++++++++ 4 files changed, 478 insertions(+) create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_sample.h create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 54bb3a0e73..c0033dea34 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -19,6 +19,7 @@ C_SOURCES = \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ + lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ lp_bld_tgsi_soa.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 3dcd5cb994..b74c9c4b6e 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -26,6 +26,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_logic.c', 'lp_bld_swizzle.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h new file mode 100644 index 0000000000..5798f191fe --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -0,0 +1,134 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca + */ + +#ifndef LP_BLD_SAMPLE_H +#define LP_BLD_SAMPLE_H + + +#include + +struct pipe_texture; +struct pipe_sampler_state; +union lp_type; + + +/** + * Sampler static state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are embedded in the generated code. + */ +struct lp_sampler_static_state +{ + /* pipe_texture's state */ + enum pipe_format format; + unsigned pot_width:1; + unsigned pot_height:1; + unsigned pot_depth:1; + + /* pipe_sampler_state's state */ + unsigned wrap_s:3; + unsigned wrap_t:3; + unsigned wrap_r:3; + unsigned min_img_filter:2; + unsigned min_mip_filter:2; + unsigned mag_img_filter:2; + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned normalized_coords:1; + unsigned prefilter:4; +}; + + +/** + * Sampler dynamic state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are computed in runtime. + * + * There are obtained through callbacks, as we don't want to tie the texture + * sampling code generation logic to any particular texture layout or pipe + * driver. + */ +struct lp_sampler_dynamic_state +{ + + /** Obtain the base texture width. */ + LLVMValueRef + (*width)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + /** Obtain the base texture height. */ + LLVMValueRef + (*height)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*stride)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*data_ptr)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + +}; + + +/** + * Derive the sampler static state. + */ +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler); + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + union lp_type fp_type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); + + + +#endif /* LP_BLD_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c new file mode 100644 index 0000000000..25c8d84501 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -0,0 +1,342 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->pot_width = util_is_pot(texture->width[0]); + state->pot_height = util_is_pot(texture->height[0]); + state->pot_depth = util_is_pot(texture->depth[0]); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + state->compare_mode = sampler->compare_mode; + state->compare_func = sampler->compare_func; + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + + +/** + * Keep all information for sampling code generation in a single place. + */ +struct lp_build_sample_context +{ + LLVMBuilderRef builder; + + const struct lp_sampler_static_state *static_state; + + struct lp_sampler_dynamic_state *dynamic_state; + + const struct util_format_description *format_desc; + + /** Incoming coordinates type and build context */ + union lp_type coord_type; + struct lp_build_context coord_bld; + + /** Integer coordinates */ + union lp_type int_coord_type; + struct lp_build_context int_coord_bld; + + /** Output texels type and build context */ + union lp_type texel_type; + struct lp_build_context texel_bld; +}; + + +static void +lp_build_sample_texel(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef x_stride; + LLVMValueRef x_offset; + LLVMValueRef y_offset; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); + + x_offset = lp_build_mul(&bld->int_coord_bld, x, x_stride); + y_offset = lp_build_mul(&bld->int_coord_bld, y, y_stride); + + offset = lp_build_add(&bld->int_coord_bld, x_offset, y_offset); + + lp_build_load_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + data_ptr, + offset, + texel); +} + + +static LLVMValueRef +lp_build_sample_wrap(struct lp_build_sample_context *bld, + LLVMValueRef coord, + LLVMValueRef length, + boolean is_pot, + unsigned wrap_mode) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef length_minus_one; + + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + if(is_pot) + coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); + else + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord = LLVMBuildURem(bld->builder, coord, length, ""); + break; + + case PIPE_TEX_WRAP_CLAMP: + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + default: + assert(0); + } + + return coord; +} + + +static void +lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef x; + LLVMValueRef y; + + x = lp_build_ifloor(&bld->coord_bld, s); + y = lp_build_ifloor(&bld->coord_bld, t); + + x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x, y, stride, data_ptr, texel); +} + + +static void +lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef half; + LLVMValueRef s_ipart; + LLVMValueRef t_ipart; + LLVMValueRef s_fpart; + LLVMValueRef t_fpart; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2][4]; + unsigned chan; + + half = lp_build_const_scalar(bld->coord_type, 0.5); + s = lp_build_sub(&bld->coord_bld, s, half); + t = lp_build_sub(&bld->coord_bld, t, half); + + s_ipart = lp_build_floor(&bld->coord_bld, s); + t_ipart = lp_build_floor(&bld->coord_bld, t); + + s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); + t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); + + x0 = lp_build_int(&bld->coord_bld, s_ipart); + y0 = lp_build_int(&bld->coord_bld, t_ipart); + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + + /* TODO: Don't interpolate missing channels */ + for(chan = 0; chan < 4; ++chan) { + switch(bld->format_desc->swizzle[chan]) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + texel[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + break; + case UTIL_FORMAT_SWIZZLE_0: + texel[chan] = bld->texel_bld.zero; + break; + case UTIL_FORMAT_SWIZZLE_1: + texel[chan] = bld->texel_bld.one; + break; + default: + assert(0); + texel[chan] = bld->texel_bld.undef; + break; + } + } +} + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_build_sample_context bld; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef stride; + LLVMValueRef data_ptr; + LLVMValueRef s; + LLVMValueRef t; + LLVMValueRef p; + + /* Setup our build context */ + memset(&bld, 0, sizeof bld); + bld.builder = builder; + bld.static_state = static_state; + bld.dynamic_state = dynamic_state; + bld.format_desc = util_format_description(static_state->format); + bld.coord_type = type; + bld.int_coord_type = lp_int_type(type); + bld.texel_type = type; + lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); + lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); + lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); + + /* Get the dynamic state */ + width = dynamic_state->width(dynamic_state, builder, unit); + height = dynamic_state->height(dynamic_state, builder, unit); + stride = dynamic_state->stride(dynamic_state, builder, unit); + data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit); + + s = coords[0]; + t = coords[1]; + p = coords[2]; + + width = lp_build_broadcast_scalar(&bld.int_coord_bld, width); + height = lp_build_broadcast_scalar(&bld.int_coord_bld, height); + stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride); + + if(static_state->normalized_coords) { + LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type); + LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, ""); + LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, ""); + s = lp_build_mul(&bld.coord_bld, s, fp_width); + t = lp_build_mul(&bld.coord_bld, t, fp_height); + } + + switch (static_state->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + } +} -- cgit v1.2.3 From e4c76c02f77ed6e86537b546f4200f8f8132d114 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 7 Sep 2009 14:52:39 +0100 Subject: llvmpipe: Code generate the texture sampling inside the shader. Finally a substantial performance improvement: framerates of apps using texturing tripled, and furthermore, enabling/disabling texturing only affects around 15% of the framerate, which means the bottleneck is now somewhere else. Generated texture sampling code is not complete though -- we always sample from the base level -- so final figures will be different. --- src/gallium/drivers/llvmpipe/Makefile | 3 +- src/gallium/drivers/llvmpipe/SConscript | 3 +- src/gallium/drivers/llvmpipe/lp_jit.c | 37 +- src/gallium/drivers/llvmpipe/lp_jit.h | 27 + src/gallium/drivers/llvmpipe/lp_state.h | 6 +- src/gallium/drivers/llvmpipe/lp_state_derived.c | 4 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 15 +- src/gallium/drivers/llvmpipe/lp_state_sampler.c | 10 + src/gallium/drivers/llvmpipe/lp_tex_sample.c | 1713 --------------------- src/gallium/drivers/llvmpipe/lp_tex_sample.h | 11 + src/gallium/drivers/llvmpipe/lp_tex_sample_c.c | 1713 +++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c | 196 +++ 12 files changed, 2019 insertions(+), 1719 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample.c create mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c create mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index c0033dea34..06c586e6bb 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -46,7 +46,8 @@ C_SOURCES = \ lp_state_vs.c \ lp_surface.c \ lp_tex_cache.c \ - lp_tex_sample.c \ + lp_tex_sample_c.c \ + lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_cache.c \ lp_tile_soa.c diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index b74c9c4b6e..ac1b5d6d1d 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -54,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vs.c', 'lp_surface.c', 'lp_tex_cache.c', - 'lp_tex_sample.c', + 'lp_tex_sample_c.c', + 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_cache.c', 'lp_tile_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index d288460a1b..9465f763d5 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -44,15 +44,47 @@ static void lp_jit_init_globals(struct llvmpipe_screen *screen) { - /* struct lp_jit_context */ + LLVMTypeRef texture_type; + + /* struct lp_jit_texture */ { LLVMTypeRef elem_types[4]; + + elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + + texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, + screen->target, texture_type, + LP_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height, + screen->target, texture_type, + LP_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride, + screen->target, texture_type, + LP_JIT_TEXTURE_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data, + screen->target, texture_type, + LP_JIT_TEXTURE_DATA); + LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, + screen->target, texture_type); + + LLVMAddTypeName(screen->module, "texture", texture_type); + } + + /* struct lp_jit_context */ + { + LLVMTypeRef elem_types[5]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ elem_types[2] = LLVMFloatType(); /* alpha_ref_value */ elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, screen->target, context_type, 3); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, + screen->target, context_type, + LP_JIT_CONTEXT_TEXTURES_INDEX); LP_CHECK_STRUCT_SIZE(struct lp_jit_context, screen->target, context_type); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index a7fb60f9f5..c3e3e1af67 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -38,11 +38,31 @@ #include "lp_bld_struct.h" +#include "pipe/p_state.h" + struct tgsi_sampler; struct llvmpipe_screen; +struct lp_jit_texture +{ + uint32_t width; + uint32_t height; + uint32_t stride; + const void *data; +}; + + +enum { + LP_JIT_TEXTURE_WIDTH = 0, + LP_JIT_TEXTURE_HEIGHT, + LP_JIT_TEXTURE_STRIDE, + LP_JIT_TEXTURE_DATA +}; + + + /** * This structure is passed directly to the generated fragment shader. * @@ -65,6 +85,8 @@ struct lp_jit_context /* TODO: blend constant color */ uint8_t *blend_color; + + struct lp_jit_texture textures[PIPE_MAX_SAMPLERS]; }; @@ -80,6 +102,11 @@ struct lp_jit_context #define lp_jit_context_blend_color(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 3, "blend_color") +#define LP_JIT_CONTEXT_TEXTURES_INDEX 4 + +#define lp_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") + typedef void (*lp_jit_frag_func)(struct lp_jit_context *context, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index fb10329887..0b846ecb13 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -36,6 +36,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "lp_jit.h" +#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */ #define LP_NEW_VIEWPORT 0x1 @@ -57,7 +58,8 @@ struct tgsi_sampler; struct vertex_info; - +struct pipe_context; +struct llvmpipe_context; struct lp_fragment_shader; @@ -67,6 +69,8 @@ struct lp_fragment_shader_variant_key struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; + + struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 6fbb057937..e87976b9f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -250,7 +250,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | - LP_NEW_DEPTH_STENCIL_ALPHA)) + LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_SAMPLER | + LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 80f705c871..1a3e168245 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -486,7 +486,13 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); +#if 0 + /* C texture sampling */ sampler = lp_c_sampler_soa_create(context_ptr); +#else + /* code generated texture sampling */ + sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); +#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -666,8 +672,11 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, */ static void make_variant_key(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, struct lp_fragment_shader_variant_key *key) { + unsigned i; + memset(key, 0, sizeof *key); memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); @@ -678,6 +687,10 @@ make_variant_key(struct llvmpipe_context *lp, /* alpha.ref_value is passed in jit_context */ memcpy(&key->blend, lp->blend, sizeof key->blend); + + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) + if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) + lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]); } @@ -688,7 +701,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant; - make_variant_key(lp, &key); + make_variant_key(lp, shader, &key); variant = shader->variants; while(variant) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 4fef541b1e..c69d90c723 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -98,6 +98,16 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, pipe_texture_reference(&llvmpipe->texture[i], tex); lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex); + + if(tex) { + struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); + struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; + jit_tex->width = tex->width[0]; + jit_tex->height = tex->height[0]; + jit_tex->stride = lp_tex->stride[0]; + if(!lp_tex->dt) + jit_tex->data = lp_tex->data; + } } llvmpipe->num_textures = num; diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c deleted file mode 100644 index 9a876f404d..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ /dev/null @@ -1,1713 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2008 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture sampling - * - * Authors: - * Brian Paul - */ - -#include "lp_context.h" -#include "lp_quad.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tex_sample.h" -#include "lp_tex_cache.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - - -/* - * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes - * see 1-pixel bands of improperly weighted linear-filtered textures. - * The tests/texwrap.c demo is a good test. - * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. - * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). - */ -#define FRAC(f) ((f) - util_ifloor(f)) - - -/** - * Linear interpolation macro - */ -static INLINE float -lerp(float a, float v0, float v1) -{ - return v0 + a * (v1 - v0); -} - - -/** - * Do 2D/biliner interpolation of float values. - * v00, v10, v01 and v11 are typically four texture samples in a square/box. - * a and b are the horizontal and vertical interpolants. - * It's important that this function is inlined when compiled with - * optimization! If we find that's not true on some systems, convert - * to a macro. - */ -static INLINE float -lerp_2d(float a, float b, - float v00, float v10, float v01, float v11) -{ - const float temp0 = lerp(a, v00, v10); - const float temp1 = lerp(a, v01, v11); - return lerp(b, temp0, temp1); -} - - -/** - * As above, but 3D interpolation of 8 values. - */ -static INLINE float -lerp_3d(float a, float b, float c, - float v000, float v100, float v010, float v110, - float v001, float v101, float v011, float v111) -{ - const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); - const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); - return lerp(c, temp0, temp1); -} - - - -/** - * If A is a signed integer, A % B doesn't give the right value for A < 0 - * (in terms of texture repeat). Just casting to unsigned fixes that. - */ -#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) - - -/** - * Apply texture coord wrapping mode and return integer texture indexes - * for a vector of four texcoords (S or T or P). - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the incoming texcoords - * \param size the texture image size - * \param icoord returns the integer texcoords - * \return integer texture index - */ -static INLINE void -nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - /* s limited to [0,1) */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch] * size); - icoord[ch] = REMAINDER(i, size); - } - return; - case PIPE_TEX_WRAP_CLAMP: - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= 0.0F) - icoord[ch] = 0; - else if (s[ch] >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] < min) - icoord[ch] = 0; - else if (s[ch] > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [-1, size] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= min) - icoord[ch] = -1; - else if (s[ch] >= max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - { - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - const float u = fabsf(s[ch]); - if (u <= 0.0F) - icoord[ch] = 0; - else if (u >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = -1; - else if (u > max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - default: - assert(0); - } -} - - -/** - * Used to compute texel locations for linear sampling for four texcoords. - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the texcoords - * \param size the texture image size - * \param icoord0 returns first texture indexes - * \param icoord1 returns second texture indexes (usually icoord0 + 1) - * \param w returns blend factor/weight between texture indexes - * \param icoord returns the computed integer texture coords - */ -static INLINE void -linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - for (ch = 0; ch < 4; ch++) { - float u = s[ch] * size - 0.5F; - icoord0[ch] = REMAINDER(util_ifloor(u), size); - icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], min, max); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - u = u * size - 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u <= min) - u = min * size; - else if (u >= max) - u = max * size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords - * Only a subset of wrap modes supported. - */ -static INLINE void -nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch]); - icoord[ch]= CLAMP(i, 0, (int) size-1); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); - } - return; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords. - * Only a subset of wrap modes supported. - */ -static INLINE void -linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* Not exactly what the spec says, but it matches NVIDIA output */ - float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord1[ch] > (int) size - 1) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break; - default: - assert(0); - } -} - - -static unsigned -choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) -{ - /* - major axis - direction target sc tc ma - ---------- ------------------------------- --- --- --- - +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx - -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx - +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry - -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry - +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz - -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz - */ - const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); - unsigned face; - float sc, tc, ma; - - if (arx > ary && arx > arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - sc = -rz; - tc = -ry; - ma = arx; - } - else { - face = PIPE_TEX_FACE_NEG_X; - sc = rz; - tc = -ry; - ma = arx; - } - } - else if (ary > arx && ary > arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - sc = rx; - tc = rz; - ma = ary; - } - else { - face = PIPE_TEX_FACE_NEG_Y; - sc = rx; - tc = -rz; - ma = ary; - } - } - else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; - sc = rx; - tc = -ry; - ma = arz; - } - else { - face = PIPE_TEX_FACE_NEG_Z; - sc = -rx; - tc = -ry; - ma = arz; - } - } - - *newS = ( sc / ma + 1.0F ) * 0.5F; - *newT = ( tc / ma + 1.0F ) * 0.5F; - - return face; -} - - -/** - * Examine the quad's texture coordinates to compute the partial - * derivatives w.r.t X and Y, then compute lambda (level of detail). - * - * This is only done for fragment shaders, not vertex shaders. - */ -static float -compute_lambda(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - float rho, lambda; - - if (samp->processor == TGSI_PROCESSOR_VERTEX) - return lodbias; - - assert(sampler->normalized_coords); - - assert(s); - { - float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; - float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; - dsdx = fabsf(dsdx); - dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width[0]; - } - if (t) { - float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; - float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; - float max; - dtdx = fabsf(dtdx); - dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height[0]; - rho = MAX2(rho, max); - } - if (p) { - float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; - float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; - float max; - dpdx = fabsf(dpdx); - dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth[0]; - rho = MAX2(rho, max); - } - - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; -} - - -/** - * Do several things here: - * 1. Compute lambda from the texcoords, if needed - * 2. Determine if we're minifying or magnifying - * 3. If minifying, choose mipmap levels - * 4. Return image filter to use within mipmap images - * \param level0 Returns first mipmap level to sample from - * \param level1 Returns second mipmap level to sample from - * \param levelBlend Returns blend factor between levels, in [0,1] - * \param imgFilter Returns either the min or mag filter, depending on lambda - */ -static void -choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - unsigned *level0, unsigned *level1, float *levelBlend, - unsigned *imgFilter) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - /* no mipmap selection needed */ - *level0 = *level1 = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->min_img_filter != sampler->mag_img_filter) { - /* non-mipmapped texture, but still need to determine if doing - * minification or magnification. - */ - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - if (lambda <= 0.0) { - *imgFilter = sampler->mag_img_filter; - } - else { - *imgFilter = sampler->min_img_filter; - } - } - else { - *imgFilter = sampler->mag_img_filter; - } - } - else { - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - - if (lambda <= 0.0) { /* XXX threshold depends on the filter */ - /* magnifying */ - *imgFilter = sampler->mag_img_filter; - *level0 = *level1 = 0; - } - else { - /* minifying */ - *imgFilter = sampler->min_img_filter; - - /* choose mipmap level(s) and compute the blend factor between them */ - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - /* Nearest mipmap level */ - const int lvl = (int) (lambda + 0.5); - *level0 = - *level1 = CLAMP(lvl, 0, (int) texture->last_level); - } - else { - /* Linear interpolation between mipmap levels */ - const int lvl = (int) lambda; - *level0 = CLAMP(lvl, 0, (int) texture->last_level); - *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); - *levelBlend = FRAC(lambda); /* blending weight between levels */ - } - } - } -} - - -/** - * Get a texel from a texture, using the texture tile cache. - * - * \param face the cube face in 0..5 - * \param level the mipmap level - * \param x the x coord of texel within 2D image - * \param y the y coord of texel within 2D image - * \param z which slice of a 3D texture - * \param rgba the quad to put the texel/color into - * \param j which element of the rgba quad to write to - * - * XXX maybe move this into lp_tile_cache.c and merge with the - * lp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... - */ -static void -get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, - const uint8_t *out[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - out[0] = &tile->color[y ][x ][0]; - out[1] = &tile->color[y ][x+1][0]; - out[2] = &tile->color[y+1][x ][0]; - out[3] = &tile->color[y+1][x+1][0]; -} - -static INLINE const uint8_t * -get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - return &tile->color[y][x][0]; -} - - -static void -get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, - int x0, int y0, - int x1, int y1, - const uint8_t *out[4]) -{ - unsigned i; - - for (i = 0; i < 4; i++) { - unsigned tx = (i & 1) ? x1 : x0; - unsigned ty = (i >> 1) ? y1 : y0; - - out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty ); - } -} - -static void -get_texel(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, int z, - float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level] || - z < 0 || z >= (int) texture->depth[level]) { - rgba[0][j] = sampler->border_color[0]; - rgba[1][j] = sampler->border_color[1]; - rgba[2][j] = sampler->border_color[2]; - rgba[3][j] = sampler->border_color[3]; - } - else { - const unsigned tx = x % TEX_TILE_SIZE; - const unsigned ty = y % TEX_TILE_SIZE; - const struct llvmpipe_cached_tex_tile *tile; - - tile = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, z, face, level)); - - rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]); - rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]); - rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]); - rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]); - if (0) - { - debug_printf("Get texel %f %f %f %f from %s\n", - rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], - pf_name(texture->format)); - } - } -} - - -/** - * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' - * When we sampled the depth texture, the depth value was put into all - * RGBA channels. We look at the red channel here. - * \param rgba quad of (depth) texel values - * \param p texture 'P' components for four pixels in quad - * \param j which pixel in the quad to test [0..3] - */ -static INLINE void -shadow_compare(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE], - uint j) -{ - int k; - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k = p[j] < rgba[0][j]; - break; - case PIPE_FUNC_LEQUAL: - k = p[j] <= rgba[0][j]; - break; - case PIPE_FUNC_GREATER: - k = p[j] > rgba[0][j]; - break; - case PIPE_FUNC_GEQUAL: - k = p[j] >= rgba[0][j]; - break; - case PIPE_FUNC_EQUAL: - k = p[j] == rgba[0][j]; - break; - case PIPE_FUNC_NOTEQUAL: - k = p[j] != rgba[0][j]; - break; - case PIPE_FUNC_ALWAYS: - k = 1; - break; - case PIPE_FUNC_NEVER: - k = 0; - break; - default: - k = 0; - assert(0); - break; - } - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; - rgba[3][j] = 1.0F; -} - - -/** - * As above, but do four z/texture comparisons. - */ -static INLINE void -shadow_compare4(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE]) -{ - int j, k0, k1, k2, k3; - float val; - - /* compare four texcoords vs. four texture samples */ - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k0 = p[0] < rgba[0][0]; - k1 = p[1] < rgba[0][1]; - k2 = p[2] < rgba[0][2]; - k3 = p[3] < rgba[0][3]; - break; - case PIPE_FUNC_LEQUAL: - k0 = p[0] <= rgba[0][0]; - k1 = p[1] <= rgba[0][1]; - k2 = p[2] <= rgba[0][2]; - k3 = p[3] <= rgba[0][3]; - break; - case PIPE_FUNC_GREATER: - k0 = p[0] > rgba[0][0]; - k1 = p[1] > rgba[0][1]; - k2 = p[2] > rgba[0][2]; - k3 = p[3] > rgba[0][3]; - break; - case PIPE_FUNC_GEQUAL: - k0 = p[0] >= rgba[0][0]; - k1 = p[1] >= rgba[0][1]; - k2 = p[2] >= rgba[0][2]; - k3 = p[3] >= rgba[0][3]; - break; - case PIPE_FUNC_EQUAL: - k0 = p[0] == rgba[0][0]; - k1 = p[1] == rgba[0][1]; - k2 = p[2] == rgba[0][2]; - k3 = p[3] == rgba[0][3]; - break; - case PIPE_FUNC_NOTEQUAL: - k0 = p[0] != rgba[0][0]; - k1 = p[1] != rgba[0][1]; - k2 = p[2] != rgba[0][2]; - k3 = p[3] != rgba[0][3]; - break; - case PIPE_FUNC_ALWAYS: - k0 = k1 = k2 = k3 = 1; - break; - case PIPE_FUNC_NEVER: - k0 = k1 = k2 = k3 = 0; - break; - default: - k0 = k1 = k2 = k3 = 0; - assert(0); - break; - } - - /* convert four pass/fail values to an intensity in [0,1] */ - val = 0.25F * (k0 + k1 + k2 + k3); - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - for (j = 0; j < 4; j++) { - rgba[0][j] = rgba[1][j] = rgba[2][j] = val; - rgba[3][j] = 1.0F; - } -} - - - -static void -lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */ - unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */ - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot - 0.5F; - float v = t[j] * ypot - 0.5F; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - float xw = u - (float)uflr; - float yw = v - (float)vflr; - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *tx[4]; - - - /* Can we fetch all four at once: - */ - if (x0 < xmax && y0 < ymax) - { - get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); - } - else - { - unsigned x1 = (x0 + 1) & (xpot - 1); - unsigned y1 = (y0 + 1) & (ypot - 1); - get_texel_quad_2d_mt(tgsi_sampler, 0, level, - x0, y0, x1, y1, tx); - } - - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw, yw, - ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]), - ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c])); - } - } -} - - -static void -lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int x0, y0; - const uint8_t *out; - - x0 = util_ifloor(u); - if (x0 < 0) - x0 = 0; - else if (x0 > xpot - 1) - x0 = xpot - 1; - - y0 = util_ifloor(v); - if (y0 < 0) - y0 = 0; - else if (y0 > ypot - 1) - y0 = ypot - 1; - - out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - int level0; - float lambda; - - lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - level0 = (int)lambda; - - if (lambda < 0.0) { - samp->level = 0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else if (level0 >= texture->last_level) { - samp->level = texture->last_level; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else { - float levelBlend = lambda - level0; - float rgba0[4][4]; - float rgba1[4][4]; - int c,j; - - samp->level = level0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba0 ); - - samp->level = level0+1; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba1 ); - - for (j = 0; j < QUAD_SIZE; j++) { - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); - } - } - } -} - -/** - * Common code for sampling 1D/2D/cube textures. - * Could probably extend for 3D... - */ -static void -lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const unsigned faces[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = texture->width[level0]; - height = texture->height[level0]; - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, - rgba2, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare(sampler, rgba2, p, j); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], - tx[c][2], tx[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - - /* XXX: This is incorrect -- will often end up with (x0 - * == x1 && y0 == y1), meaning that we fetch the same - * texel four times and linearly interpolate between - * identical values. The correct approach would be to - * call linear_texcoord again for the second level. - */ - x0[j] /= 2; - y0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static INLINE void -lp_get_samples_1d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - static const float tzero[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, tzero, NULL, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_2d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, t, p, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - /* get/map pipe_surfaces corresponding to 3D tex slices */ - unsigned level0, level1, j, imgFilter; - int width, height, depth; - float levelBlend; - const uint face = 0; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; - - assert(width > 0); - assert(height > 0); - assert(depth > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4], z[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - nearest_texcoord_4(sampler->wrap_r, p, depth, z); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - z[j] /= 2; - get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; - float xw[4], yw[4], zw[4]; /* interpolation weights */ - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - float tx0[4][4], tx1[4][4]; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0[j] /= 2; - y0[j] /= 2; - z0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - z1[j] /= 2; - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - /* blend mipmap levels */ - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static void -lp_get_samples_cube(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - unsigned faces[QUAD_SIZE], j; - float ssss[4], tttt[4]; - for (j = 0; j < QUAD_SIZE; j++) { - faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); - } - lp_get_samples_2d_common(sampler, ssss, tttt, NULL, - lodbias, rgba, faces); -} - - -static void -lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - const uint face = 0; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - /* texture RECTS cannot be mipmapped */ - assert(level0 == level1); - - width = texture->width[level0]; - height = texture->height[level0]; - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); - nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - } - } - break; - default: - assert(0); - } -} - - -/** - * Error condition handler - */ -static INLINE void -lp_get_samples_null(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - int i,j; - - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) - rgba[i][j] = 1.0; -} - -/** - * Called via tgsi_sampler::get_samples() when using a sampler for the - * first time. Determine the actual sampler function, link it in and - * call it. - */ -void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - /* Default to the 'undefined' case: - */ - tgsi_sampler->get_samples = lp_get_samples_null; - - if (!texture) { - assert(0); /* is this legal?? */ - goto out; - } - - if (!sampler->normalized_coords) { - assert (texture->target == PIPE_TEXTURE_2D); - tgsi_sampler->get_samples = lp_get_samples_rect; - goto out; - } - - switch (texture->target) { - case PIPE_TEXTURE_1D: - tgsi_sampler->get_samples = lp_get_samples_1d; - break; - case PIPE_TEXTURE_2D: - tgsi_sampler->get_samples = lp_get_samples_2d; - break; - case PIPE_TEXTURE_3D: - tgsi_sampler->get_samples = lp_get_samples_3d; - break; - case PIPE_TEXTURE_CUBE: - tgsi_sampler->get_samples = lp_get_samples_cube; - break; - default: - assert(0); - break; - } - - /* Do this elsewhere: - */ - samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); - samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); - - /* Try to hook in a faster sampler. Ultimately we'll have to - * code-generate these. Luckily most of this looks like it is - * orthogonal state within the sampler. - */ - if (texture->target == PIPE_TEXTURE_2D && - sampler->min_img_filter == sampler->mag_img_filter && - sampler->wrap_s == sampler->wrap_t && - sampler->compare_mode == FALSE && - sampler->normalized_coords) - { - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - samp->level = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT; - break; - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT; - break; - default: - break; - } - } - else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT; - break; - default: - break; - } - } - } - else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT; - break; - default: - break; - } - } - } - } - else if (0) { - _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n", - texture->target, PIPE_TEXTURE_2D, - sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE, - sampler->min_img_filter, sampler->mag_img_filter, - sampler->wrap_s, sampler->wrap_t, - sampler->compare_mode, FALSE, - sampler->normalized_coords, TRUE); - } - -out: - tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); -} - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -#include "lp_bld_type.h" -#include "lp_bld_intr.h" -#include "lp_bld_tgsi.h" - - -struct lp_c_sampler_soa -{ - struct lp_build_sampler_soa base; - - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -static void -lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) -{ - FREE(sampler); -} - - -static void -lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, - LLVMBuilderRef builder, - union lp_type type, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!sampler->samplers_ptr) - sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); - - if(!sampler->store_ptr) - sampler->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = sampler->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = sampler->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr) -{ - struct lp_c_sampler_soa *sampler; - - sampler = CALLOC_STRUCT(lp_c_sampler_soa); - if(!sampler) - return NULL; - - sampler->base.destroy = lp_c_sampler_soa_destroy; - sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; - sampler->context_ptr = context_ptr; - - return &sampler->base; -} - diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 7d1e565885..9ad1bde956 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -35,6 +35,7 @@ struct llvmpipe_tex_tile_cache; +struct lp_sampler_static_state; /** @@ -87,4 +88,14 @@ struct lp_build_sampler_soa * lp_c_sampler_soa_create(LLVMValueRef context_ptr); +/** + * Pure-LLVM texture sampling code generator. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key, + LLVMValueRef context_ptr); + + #endif /* LP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c new file mode 100644 index 0000000000..9a876f404d --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -0,0 +1,1713 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling + * + * Authors: + * Brian Paul + */ + +#include "lp_context.h" +#include "lp_quad.h" +#include "lp_surface.h" +#include "lp_texture.h" +#include "lp_tex_sample.h" +#include "lp_tex_cache.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + + +/* + * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes + * see 1-pixel bands of improperly weighted linear-filtered textures. + * The tests/texwrap.c demo is a good test. + * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. + * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). + */ +#define FRAC(f) ((f) - util_ifloor(f)) + + +/** + * Linear interpolation macro + */ +static INLINE float +lerp(float a, float v0, float v1) +{ + return v0 + a * (v1 - v0); +} + + +/** + * Do 2D/biliner interpolation of float values. + * v00, v10, v01 and v11 are typically four texture samples in a square/box. + * a and b are the horizontal and vertical interpolants. + * It's important that this function is inlined when compiled with + * optimization! If we find that's not true on some systems, convert + * to a macro. + */ +static INLINE float +lerp_2d(float a, float b, + float v00, float v10, float v01, float v11) +{ + const float temp0 = lerp(a, v00, v10); + const float temp1 = lerp(a, v01, v11); + return lerp(b, temp0, temp1); +} + + +/** + * As above, but 3D interpolation of 8 values. + */ +static INLINE float +lerp_3d(float a, float b, float c, + float v000, float v100, float v010, float v110, + float v001, float v101, float v011, float v111) +{ + const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); + const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); + return lerp(c, temp0, temp1); +} + + + +/** + * If A is a signed integer, A % B doesn't give the right value for A < 0 + * (in terms of texture repeat). Just casting to unsigned fixes that. + */ +#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) + + +/** + * Apply texture coord wrapping mode and return integer texture indexes + * for a vector of four texcoords (S or T or P). + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the incoming texcoords + * \param size the texture image size + * \param icoord returns the integer texcoords + * \return integer texture index + */ +static INLINE void +nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + /* s limited to [0,1) */ + /* i limited to [0,size-1] */ + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch] * size); + icoord[ch] = REMAINDER(i, size); + } + return; + case PIPE_TEX_WRAP_CLAMP: + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= 0.0F) + icoord[ch] = 0; + else if (s[ch] >= 1.0F) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + if (s[ch] < min) + icoord[ch] = 0; + else if (s[ch] > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [-1, size] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= min) + icoord[ch] = -1; + else if (s[ch] >= max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(s[ch] * size); + } + } + return; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + { + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; + if (flr & 1) + u = 1.0F - (s[ch] - (float) flr); + else + u = s[ch] - (float) flr; + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } + } + return; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + for (ch = 0; ch < 4; ch++) { + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + const float u = fabsf(s[ch]); + if (u <= 0.0F) + icoord[ch] = 0; + else if (u >= 1.0F) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } + return; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } + } + return; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = -1; + else if (u > max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(u * size); + } + } + return; + default: + assert(0); + } +} + + +/** + * Used to compute texel locations for linear sampling for four texcoords. + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the texcoords + * \param size the texture image size + * \param icoord0 returns first texture indexes + * \param icoord1 returns second texture indexes (usually icoord0 + 1) + * \param w returns blend factor/weight between texture indexes + * \param icoord returns the computed integer texture coords + */ +static INLINE void +linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + for (ch = 0; ch < 4; ch++) { + float u = s[ch] * size - 0.5F; + icoord0[ch] = REMAINDER(util_ifloor(u), size); + icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_CLAMP: + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], min, max); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + } + break;; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; + if (flr & 1) + u = 1.0F - (s[ch] - (float) flr); + else + u = s[ch] - (float) flr; + u = u * size - 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u <= min) + u = min * size; + else if (u >= max) + u = max * size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + } + break;; + default: + assert(0); + } +} + + +/** + * For RECT textures / unnormalized texcoords + * Only a subset of wrap modes supported. + */ +static INLINE void +nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch]); + icoord[ch]= CLAMP(i, 0, (int) size-1); + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + for (ch = 0; ch < 4; ch++) { + icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); + } + return; + default: + assert(0); + } +} + + +/** + * For RECT textures / unnormalized texcoords. + * Only a subset of wrap modes supported. + */ +static INLINE void +linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + for (ch = 0; ch < 4; ch++) { + /* Not exactly what the spec says, but it matches NVIDIA output */ + float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord1[ch] > (int) size - 1) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break; + default: + assert(0); + } +} + + +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + *newS = ( sc / ma + 1.0F ) * 0.5F; + *newT = ( tc / ma + 1.0F ) * 0.5F; + + return face; +} + + +/** + * Examine the quad's texture coordinates to compute the partial + * derivatives w.r.t X and Y, then compute lambda (level of detail). + * + * This is only done for fragment shaders, not vertex shaders. + */ +static float +compute_lambda(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + float rho, lambda; + + if (samp->processor == TGSI_PROCESSOR_VERTEX) + return lodbias; + + assert(sampler->normalized_coords); + + assert(s); + { + float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; + float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; + dsdx = fabsf(dsdx); + dsdy = fabsf(dsdy); + rho = MAX2(dsdx, dsdy) * texture->width[0]; + } + if (t) { + float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; + float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; + float max; + dtdx = fabsf(dtdx); + dtdy = fabsf(dtdy); + max = MAX2(dtdx, dtdy) * texture->height[0]; + rho = MAX2(rho, max); + } + if (p) { + float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; + float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; + float max; + dpdx = fabsf(dpdx); + dpdy = fabsf(dpdy); + max = MAX2(dpdx, dpdy) * texture->depth[0]; + rho = MAX2(rho, max); + } + + lambda = util_fast_log2(rho); + lambda += lodbias + sampler->lod_bias; + lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); + + return lambda; +} + + +/** + * Do several things here: + * 1. Compute lambda from the texcoords, if needed + * 2. Determine if we're minifying or magnifying + * 3. If minifying, choose mipmap levels + * 4. Return image filter to use within mipmap images + * \param level0 Returns first mipmap level to sample from + * \param level1 Returns second mipmap level to sample from + * \param levelBlend Returns blend factor between levels, in [0,1] + * \param imgFilter Returns either the min or mag filter, depending on lambda + */ +static void +choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + unsigned *level0, unsigned *level1, float *levelBlend, + unsigned *imgFilter) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + /* no mipmap selection needed */ + *level0 = *level1 = CLAMP((int) sampler->min_lod, + 0, (int) texture->last_level); + + if (sampler->min_img_filter != sampler->mag_img_filter) { + /* non-mipmapped texture, but still need to determine if doing + * minification or magnification. + */ + float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); + if (lambda <= 0.0) { + *imgFilter = sampler->mag_img_filter; + } + else { + *imgFilter = sampler->min_img_filter; + } + } + else { + *imgFilter = sampler->mag_img_filter; + } + } + else { + float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); + + if (lambda <= 0.0) { /* XXX threshold depends on the filter */ + /* magnifying */ + *imgFilter = sampler->mag_img_filter; + *level0 = *level1 = 0; + } + else { + /* minifying */ + *imgFilter = sampler->min_img_filter; + + /* choose mipmap level(s) and compute the blend factor between them */ + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + /* Nearest mipmap level */ + const int lvl = (int) (lambda + 0.5); + *level0 = + *level1 = CLAMP(lvl, 0, (int) texture->last_level); + } + else { + /* Linear interpolation between mipmap levels */ + const int lvl = (int) lambda; + *level0 = CLAMP(lvl, 0, (int) texture->last_level); + *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); + *levelBlend = FRAC(lambda); /* blending weight between levels */ + } + } + } +} + + +/** + * Get a texel from a texture, using the texture tile cache. + * + * \param face the cube face in 0..5 + * \param level the mipmap level + * \param x the x coord of texel within 2D image + * \param y the y coord of texel within 2D image + * \param z which slice of a 3D texture + * \param rgba the quad to put the texel/color into + * \param j which element of the rgba quad to write to + * + * XXX maybe move this into lp_tile_cache.c and merge with the + * lp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... + */ +static void +get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, int x, int y, + const uint8_t *out[4]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + + const struct llvmpipe_cached_tex_tile *tile + = lp_get_cached_tex_tile(samp->cache, + tex_tile_address(x, y, 0, face, level)); + + y %= TEX_TILE_SIZE; + x %= TEX_TILE_SIZE; + + out[0] = &tile->color[y ][x ][0]; + out[1] = &tile->color[y ][x+1][0]; + out[2] = &tile->color[y+1][x ][0]; + out[3] = &tile->color[y+1][x+1][0]; +} + +static INLINE const uint8_t * +get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, int x, int y) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + + const struct llvmpipe_cached_tex_tile *tile + = lp_get_cached_tex_tile(samp->cache, + tex_tile_address(x, y, 0, face, level)); + + y %= TEX_TILE_SIZE; + x %= TEX_TILE_SIZE; + + return &tile->color[y][x][0]; +} + + +static void +get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, + int x0, int y0, + int x1, int y1, + const uint8_t *out[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + unsigned tx = (i & 1) ? x1 : x0; + unsigned ty = (i >> 1) ? y1 : y0; + + out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty ); + } +} + +static void +get_texel(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, int x, int y, int z, + float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + + if (x < 0 || x >= (int) texture->width[level] || + y < 0 || y >= (int) texture->height[level] || + z < 0 || z >= (int) texture->depth[level]) { + rgba[0][j] = sampler->border_color[0]; + rgba[1][j] = sampler->border_color[1]; + rgba[2][j] = sampler->border_color[2]; + rgba[3][j] = sampler->border_color[3]; + } + else { + const unsigned tx = x % TEX_TILE_SIZE; + const unsigned ty = y % TEX_TILE_SIZE; + const struct llvmpipe_cached_tex_tile *tile; + + tile = lp_get_cached_tex_tile(samp->cache, + tex_tile_address(x, y, z, face, level)); + + rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]); + rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]); + rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]); + rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]); + if (0) + { + debug_printf("Get texel %f %f %f %f from %s\n", + rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], + pf_name(texture->format)); + } + } +} + + +/** + * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' + * When we sampled the depth texture, the depth value was put into all + * RGBA channels. We look at the red channel here. + * \param rgba quad of (depth) texel values + * \param p texture 'P' components for four pixels in quad + * \param j which pixel in the quad to test [0..3] + */ +static INLINE void +shadow_compare(const struct pipe_sampler_state *sampler, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const float p[QUAD_SIZE], + uint j) +{ + int k; + switch (sampler->compare_func) { + case PIPE_FUNC_LESS: + k = p[j] < rgba[0][j]; + break; + case PIPE_FUNC_LEQUAL: + k = p[j] <= rgba[0][j]; + break; + case PIPE_FUNC_GREATER: + k = p[j] > rgba[0][j]; + break; + case PIPE_FUNC_GEQUAL: + k = p[j] >= rgba[0][j]; + break; + case PIPE_FUNC_EQUAL: + k = p[j] == rgba[0][j]; + break; + case PIPE_FUNC_NOTEQUAL: + k = p[j] != rgba[0][j]; + break; + case PIPE_FUNC_ALWAYS: + k = 1; + break; + case PIPE_FUNC_NEVER: + k = 0; + break; + default: + k = 0; + assert(0); + break; + } + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; + rgba[3][j] = 1.0F; +} + + +/** + * As above, but do four z/texture comparisons. + */ +static INLINE void +shadow_compare4(const struct pipe_sampler_state *sampler, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const float p[QUAD_SIZE]) +{ + int j, k0, k1, k2, k3; + float val; + + /* compare four texcoords vs. four texture samples */ + switch (sampler->compare_func) { + case PIPE_FUNC_LESS: + k0 = p[0] < rgba[0][0]; + k1 = p[1] < rgba[0][1]; + k2 = p[2] < rgba[0][2]; + k3 = p[3] < rgba[0][3]; + break; + case PIPE_FUNC_LEQUAL: + k0 = p[0] <= rgba[0][0]; + k1 = p[1] <= rgba[0][1]; + k2 = p[2] <= rgba[0][2]; + k3 = p[3] <= rgba[0][3]; + break; + case PIPE_FUNC_GREATER: + k0 = p[0] > rgba[0][0]; + k1 = p[1] > rgba[0][1]; + k2 = p[2] > rgba[0][2]; + k3 = p[3] > rgba[0][3]; + break; + case PIPE_FUNC_GEQUAL: + k0 = p[0] >= rgba[0][0]; + k1 = p[1] >= rgba[0][1]; + k2 = p[2] >= rgba[0][2]; + k3 = p[3] >= rgba[0][3]; + break; + case PIPE_FUNC_EQUAL: + k0 = p[0] == rgba[0][0]; + k1 = p[1] == rgba[0][1]; + k2 = p[2] == rgba[0][2]; + k3 = p[3] == rgba[0][3]; + break; + case PIPE_FUNC_NOTEQUAL: + k0 = p[0] != rgba[0][0]; + k1 = p[1] != rgba[0][1]; + k2 = p[2] != rgba[0][2]; + k3 = p[3] != rgba[0][3]; + break; + case PIPE_FUNC_ALWAYS: + k0 = k1 = k2 = k3 = 1; + break; + case PIPE_FUNC_NEVER: + k0 = k1 = k2 = k3 = 0; + break; + default: + k0 = k1 = k2 = k3 = 0; + assert(0); + break; + } + + /* convert four pass/fail values to an intensity in [0,1] */ + val = 0.25F * (k0 + k1 + k2 + k3); + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + for (j = 0; j < 4; j++) { + rgba[0][j] = rgba[1][j] = rgba[2][j] = val; + rgba[3][j] = 1.0F; + } +} + + + +static void +lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = 1 << (samp->xpot - level); + unsigned ypot = 1 << (samp->ypot - level); + unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */ + unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */ + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot - 0.5F; + float v = t[j] * ypot - 0.5F; + + int uflr = util_ifloor(u); + int vflr = util_ifloor(v); + + float xw = u - (float)uflr; + float yw = v - (float)vflr; + + int x0 = uflr & (xpot - 1); + int y0 = vflr & (ypot - 1); + + const uint8_t *tx[4]; + + + /* Can we fetch all four at once: + */ + if (x0 < xmax && y0 < ymax) + { + get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); + } + else + { + unsigned x1 = (x0 + 1) & (xpot - 1); + unsigned y1 = (y0 + 1) & (ypot - 1); + get_texel_quad_2d_mt(tgsi_sampler, 0, level, + x0, y0, x1, y1, tx); + } + + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw, yw, + ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]), + ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c])); + } + } +} + + +static void +lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = 1 << (samp->xpot - level); + unsigned ypot = 1 << (samp->ypot - level); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot; + float v = t[j] * ypot; + + int uflr = util_ifloor(u); + int vflr = util_ifloor(v); + + int x0 = uflr & (xpot - 1); + int y0 = vflr & (ypot - 1); + + const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); + + for (c = 0; c < 4; c++) { + rgba[c][j] = ubyte_to_float(out[c]); + } + } +} + + +static void +lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = 1 << (samp->xpot - level); + unsigned ypot = 1 << (samp->ypot - level); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot; + float v = t[j] * ypot; + + int x0, y0; + const uint8_t *out; + + x0 = util_ifloor(u); + if (x0 < 0) + x0 = 0; + else if (x0 > xpot - 1) + x0 = xpot - 1; + + y0 = util_ifloor(v); + if (y0 < 0) + y0 = 0; + else if (y0 > ypot - 1) + y0 = ypot - 1; + + out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); + + for (c = 0; c < 4; c++) { + rgba[c][j] = ubyte_to_float(out[c]); + } + } +} + + +static void +lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + int level0; + float lambda; + + lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); + level0 = (int)lambda; + + if (lambda < 0.0) { + samp->level = 0; + lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, 0, rgba ); + } + else if (level0 >= texture->last_level) { + samp->level = texture->last_level; + lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, 0, rgba ); + } + else { + float levelBlend = lambda - level0; + float rgba0[4][4]; + float rgba1[4][4]; + int c,j; + + samp->level = level0; + lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, 0, rgba0 ); + + samp->level = level0+1; + lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, 0, rgba1 ); + + for (j = 0; j < QUAD_SIZE; j++) { + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); + } + } + } +} + +/** + * Common code for sampling 1D/2D/cube textures. + * Could probably extend for 3D... + */ +static void +lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const unsigned faces[4]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend; + + choose_mipmap_levels(tgsi_sampler, s, t, p, + lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + assert(sampler->normalized_coords); + + width = texture->width[level0]; + height = texture->height[level0]; + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + { + int x[4], y[4]; + nearest_texcoord_4(sampler->wrap_s, s, width, x); + nearest_texcoord_4(sampler->wrap_t, t, height, y); + + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(sampler, rgba, p, j); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x[j] /= 2; + y[j] /= 2; + get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, + rgba2, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(sampler, rgba2, p, j); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + { + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ + + linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); + + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4]; /* texels */ + int c; + get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare4(sampler, tx, p); + } + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], + tx[c][2], tx[c][3]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + + /* XXX: This is incorrect -- will often end up with (x0 + * == x1 && y0 == y1), meaning that we fetch the same + * texel four times and linearly interpolate between + * identical values. The correct approach would be to + * call linear_texcoord again for the second level. + */ + x0[j] /= 2; + y0[j] /= 2; + x1[j] /= 2; + y1[j] /= 2; + get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare4(sampler, tx, p); + } + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba2[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + } + break; + default: + assert(0); + } +} + + +static INLINE void +lp_get_samples_1d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + static const float tzero[4] = {0, 0, 0, 0}; + lp_get_samples_2d_common(sampler, s, tzero, NULL, + lodbias, rgba, faces); +} + + +static INLINE void +lp_get_samples_2d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + lp_get_samples_2d_common(sampler, s, t, p, + lodbias, rgba, faces); +} + + +static INLINE void +lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + /* get/map pipe_surfaces corresponding to 3D tex slices */ + unsigned level0, level1, j, imgFilter; + int width, height, depth; + float levelBlend; + const uint face = 0; + + choose_mipmap_levels(tgsi_sampler, s, t, p, + lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + assert(sampler->normalized_coords); + + width = texture->width[level0]; + height = texture->height[level0]; + depth = texture->depth[level0]; + + assert(width > 0); + assert(height > 0); + assert(depth > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + { + int x[4], y[4], z[4]; + nearest_texcoord_4(sampler->wrap_s, s, width, x); + nearest_texcoord_4(sampler->wrap_t, t, height, y); + nearest_texcoord_4(sampler->wrap_r, p, depth, z); + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x[j] /= 2; + y[j] /= 2; + z[j] /= 2; + get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); + } + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + { + int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; + float xw[4], yw[4], zw[4]; /* interpolation weights */ + linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); + linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + float tx0[4][4], tx1[4][4]; + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], + tx0[c][0], tx0[c][1], + tx0[c][2], tx0[c][3], + tx1[c][0], tx1[c][1], + tx1[c][2], tx1[c][3]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0[j] /= 2; + y0[j] /= 2; + z0[j] /= 2; + x1[j] /= 2; + y1[j] /= 2; + z1[j] /= 2; + get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); + get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); + get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); + get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); + get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); + get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); + get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); + get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], + tx0[c][0], tx0[c][1], + tx0[c][2], tx0[c][3], + tx1[c][0], tx1[c][1], + tx1[c][2], tx1[c][3]); + } + + /* blend mipmap levels */ + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + } + break; + default: + assert(0); + } +} + + +static void +lp_get_samples_cube(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + unsigned faces[QUAD_SIZE], j; + float ssss[4], tttt[4]; + for (j = 0; j < QUAD_SIZE; j++) { + faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); + } + lp_get_samples_2d_common(sampler, ssss, tttt, NULL, + lodbias, rgba, faces); +} + + +static void +lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + const uint face = 0; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend; + + choose_mipmap_levels(tgsi_sampler, s, t, p, + lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + /* texture RECTS cannot be mipmapped */ + assert(level0 == level1); + + width = texture->width[level0]; + height = texture->height[level0]; + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + { + int x[4], y[4]; + nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); + nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(sampler, rgba, p, j); + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + { + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ + linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4]; /* texels */ + int c; + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare4(sampler, tx, p); + } + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + } + } + break; + default: + assert(0); + } +} + + +/** + * Error condition handler + */ +static INLINE void +lp_get_samples_null(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + int i,j; + + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + rgba[i][j] = 1.0; +} + +/** + * Called via tgsi_sampler::get_samples() when using a sampler for the + * first time. Determine the actual sampler function, link it in and + * call it. + */ +void +lp_get_samples(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + + /* Default to the 'undefined' case: + */ + tgsi_sampler->get_samples = lp_get_samples_null; + + if (!texture) { + assert(0); /* is this legal?? */ + goto out; + } + + if (!sampler->normalized_coords) { + assert (texture->target == PIPE_TEXTURE_2D); + tgsi_sampler->get_samples = lp_get_samples_rect; + goto out; + } + + switch (texture->target) { + case PIPE_TEXTURE_1D: + tgsi_sampler->get_samples = lp_get_samples_1d; + break; + case PIPE_TEXTURE_2D: + tgsi_sampler->get_samples = lp_get_samples_2d; + break; + case PIPE_TEXTURE_3D: + tgsi_sampler->get_samples = lp_get_samples_3d; + break; + case PIPE_TEXTURE_CUBE: + tgsi_sampler->get_samples = lp_get_samples_cube; + break; + default: + assert(0); + break; + } + + /* Do this elsewhere: + */ + samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); + samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); + + /* Try to hook in a faster sampler. Ultimately we'll have to + * code-generate these. Luckily most of this looks like it is + * orthogonal state within the sampler. + */ + if (texture->target == PIPE_TEXTURE_2D && + sampler->min_img_filter == sampler->mag_img_filter && + sampler->wrap_s == sampler->wrap_t && + sampler->compare_mode == FALSE && + sampler->normalized_coords) + { + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + samp->level = CLAMP((int) sampler->min_lod, + 0, (int) texture->last_level); + + if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { + switch (sampler->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT; + break; + case PIPE_TEX_FILTER_LINEAR: + tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT; + break; + default: + break; + } + } + else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) { + switch (sampler->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT; + break; + default: + break; + } + } + } + else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { + switch (sampler->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT; + break; + default: + break; + } + } + } + } + else if (0) { + _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n", + texture->target, PIPE_TEXTURE_2D, + sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE, + sampler->min_img_filter, sampler->mag_img_filter, + sampler->wrap_s, sampler->wrap_t, + sampler->compare_mode, FALSE, + sampler->normalized_coords, TRUE); + } + +out: + tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); +} + + +void PIPE_CDECL +lp_fetch_texel_soa( struct tgsi_sampler **samplers, + uint32_t unit, + float *store ) +{ + struct tgsi_sampler *sampler = samplers[unit]; + +#if 0 + uint j; + + debug_printf("%s sampler: %p (%p) store: %p\n", + __FUNCTION__, + sampler, *sampler, + store ); + + debug_printf("lodbias %f\n", store[12]); + + for (j = 0; j < 4; j++) + debug_printf("sample %d texcoord %f %f\n", + j, + store[0+j], + store[4+j]); +#endif + + { + float rgba[NUM_CHANNELS][QUAD_SIZE]; + sampler->get_samples(sampler, + &store[0], + &store[4], + &store[8], + 0.0f, /*store[12], lodbias */ + rgba); + memcpy(store, rgba, sizeof rgba); + } + +#if 0 + for (j = 0; j < 4; j++) + debug_printf("sample %d result %f %f %f %f\n", + j, + store[0+j], + store[4+j], + store[8+j], + store[12+j]); +#endif +} + + +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_tgsi.h" + + +struct lp_c_sampler_soa +{ + struct lp_build_sampler_soa base; + + LLVMValueRef context_ptr; + + LLVMValueRef samplers_ptr; + + /** Coords/texels store */ + LLVMValueRef store_ptr; +}; + + +static void +lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; + LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); + LLVMValueRef args[3]; + unsigned i; + + if(!sampler->samplers_ptr) + sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); + + if(!sampler->store_ptr) + sampler->store_ptr = LLVMBuildArrayAlloca(builder, + vec_type, + LLVMConstInt(LLVMInt32Type(), 4, 0), + "texel_store"); + + for (i = 0; i < num_coords; i++) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + LLVMBuildStore(builder, coords[i], coord_ptr); + } + + args[0] = sampler->samplers_ptr; + args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); + args[2] = sampler->store_ptr; + + lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); + } +} + + +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr) +{ + struct lp_c_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_c_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_c_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; + sampler->context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c new file mode 100644 index 0000000000..7d31705d01 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -0,0 +1,196 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling code generation + * + * This file is nothing more than ugly glue between three largely independent + * entities: + * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa) + * - texture sampling code generation (i.e., lp_build_sample_soa) + * - LLVM pipe driver + * + * All interesting code is in the functions mentioned above. There is really + * nothing to see here. + * + * @author Jose Fonseca + */ + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_sample.h" +#include "lp_bld_tgsi.h" +#include "lp_state.h" +#include "lp_tex_sample.h" + + +/** + * This provides the bridge between the sampler state store in lp_jit_context + * and lp_jit_texture and the sampler code generator. It provides the + * texture layout information required by the texture sampler code generator + * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime. + */ +struct llvmpipe_sampler_dynamic_state +{ + struct lp_sampler_dynamic_state base; + + const struct lp_sampler_static_state *static_state; + + LLVMValueRef context_ptr; +}; + + +/** + * This is the bridge between our sampler and the TGSI translator. + */ +struct lp_llvm_sampler_soa +{ + struct lp_build_sampler_soa base; + + struct llvmpipe_sampler_dynamic_state dynamic_state; +}; + + +/** + * Fetch the specified member of the lp_jit_texture structure. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, + LLVMBuilderRef builder, + unsigned unit, + unsigned member_index, + const char *member_name) +{ + struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(unit < PIPE_MAX_SAMPLERS); + + /* context[0] */ + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + /* context[0].textures */ + indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0); + /* context[0].textures[unit] */ + indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0); + /* context[0].textures[unit].member */ + indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0); + + ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); + + res = LLVMBuildLoad(builder, ptr, ""); + + lp_build_name(res, "context.texture%u.%s", unit, member_name); + + return res; +} + + +/** + * Helper macro to instantiate the functions that generate the code to fetch + * the members of lp_jit_texture to fulfill the sampler code generator requests. + * + * This complexity is the price we have to pay to keep the texture sampler code + * generator a reusable module without dependencies to llvmpipe internals. + */ +#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \ + static LLVMValueRef \ + lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \ + LLVMBuilderRef builder, \ + unsigned unit) \ + { \ + return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \ + } + + +LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH) +LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT) +LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE) +LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA) + + +static void +lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base; + + assert(unit < PIPE_MAX_SAMPLERS); + + lp_build_sample_soa(builder, + &sampler->dynamic_state.static_state[unit], + &sampler->dynamic_state.base, + type, + unit, + num_coords, + coords, + lodbias, + texel); +} + + +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, + LLVMValueRef context_ptr) +{ + struct lp_llvm_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_llvm_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_llvm_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel; + sampler->dynamic_state.base.width = lp_llvm_texture_width; + sampler->dynamic_state.base.height = lp_llvm_texture_height; + sampler->dynamic_state.base.stride = lp_llvm_texture_stride; + sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr; + sampler->dynamic_state.static_state = static_state; + sampler->dynamic_state.context_ptr = context_ptr; + + return &sampler->base; +} + -- cgit v1.2.3 From 11272010887ce26b0f4162dd311376798c1d3fc3 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 7 Sep 2009 14:53:15 +0100 Subject: llvmpipe: Better abs for floating points. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index a1d8a89774..be7442d00a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -591,20 +591,31 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { const union lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); if(!type.sign) return a; - /* XXX: is this really necessary? */ + if(type.floating) { + /* Mask out the sign bit */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; + } + #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(!type.floating && type.width*type.length == 128) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - if(type.width == 8) + if(type.width*type.length == 128) { + switch(type.width) { + case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); - if(type.width == 16) + case 16: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); - if(type.width == 32) + case 32: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); + } } #endif -- cgit v1.2.3 From b481fb2c6d8a8def0956acb0bf9083f5441edd07 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 7 Sep 2009 14:53:26 +0100 Subject: llvmpipe: Silent debug statement. --- src/gallium/drivers/llvmpipe/lp_tex_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index 23a94b5b0d..773e848242 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -154,7 +154,7 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc) if (lpt->timestamp != tc->timestamp) { /* texture was modified, invalidate all cached tiles */ uint i; - _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); + debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); for (i = 0; i < NUM_ENTRIES; i++) { tc->entries[i].addr.bits.invalid = 1; } -- cgit v1.2.3 From 79f48c9f9e739a1f6b0810072e41bc826f2b789d Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 7 Sep 2009 15:16:25 +0100 Subject: scons: Don't set LLVM_VERSION if one of the llvm-config calls fails. Ubuntu 8.10 has llvm-config version 2.2, which doesn't have nativecodegen. This triggers an exception. --- scons/llvm.py | 18 ++++++++++-------- src/gallium/drivers/llvmpipe/SConscript | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/scons/llvm.py b/scons/llvm.py index 702f1e354f..46a8d829ca 100644 --- a/scons/llvm.py +++ b/scons/llvm.py @@ -56,15 +56,17 @@ def generate(env): env.PrependENVPath('PATH', llvm_bin_dir) if env.Detect('llvm-config'): - try: - env['LLVM_VERSION'] = env.backtick('llvm-config --version') - except AttributeError: - env['LLVM_VERSION'] = 'X.X' + version = env.backtick('llvm-config --version').rstrip() - env.ParseConfig('llvm-config --cppflags') - env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter') - env.ParseConfig('llvm-config --ldflags') - env['LINK'] = env['CXX'] + try: + env.ParseConfig('llvm-config --cppflags') + env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter') + env.ParseConfig('llvm-config --ldflags') + except OSError: + print 'llvm-config version %s failed' % version + else: + env['LINK'] = env['CXX'] + env['LLVM_VERSION'] = version def exists(env): return True diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index ac1b5d6d1d..dea4b703c4 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -3,7 +3,7 @@ Import('*') env = env.Clone() env.Tool('llvm') -if 'LLVM_VERSION' not in env: +if env.has_key('LLVM_VERSION') is False: print 'warning: LLVM not found: not building llvmpipe' Return() -- cgit v1.2.3 From 01c831576eb77ec87bff667ed5591a93cbbef97d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 9 Sep 2009 19:21:22 +0100 Subject: llvmpipe: Debug function to check stack alignment. Doing alignment check in locus is redundant, as gcc alignment assumptions will optimize away the check. --- src/gallium/drivers/llvmpipe/lp_bld_debug.c | 17 +++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_debug.h | 4 ++++ src/gallium/drivers/llvmpipe/lp_setup.c | 11 +++++++---- 3 files changed, 28 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c index 30925b5f41..59d8f492e6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c @@ -30,10 +30,27 @@ #include #endif +#include "util/u_math.h" #include "util/u_debug.h" #include "lp_bld_debug.h" +/** + * Check alignment. + * + * It is important that this check is not implemented as a macro or inlined + * function, as the compiler assumptions in respect to alignment of global + * and stack variables would often make the check a no op, defeating the + * whole purpose of the exercise. + */ +boolean +lp_check_alignment(const void *ptr, unsigned alignment) +{ + assert(util_is_pot(alignment)); + return ((uintptr_t)ptr & (alignment - 1)) == 0; +} + + void lp_disassemble(const void* func) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h index ecdafef76d..583e6132b4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.h @@ -53,6 +53,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...) } +boolean +lp_check_alignment(const void *ptr, unsigned alignment); + + void lp_disassemble(const void* func); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d145f6d6bb..f9e254efca 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -44,6 +44,7 @@ #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "lp_bld_debug.h" #include "lp_tile_cache.h" #include "lp_tile_soa.h" @@ -164,10 +165,12 @@ shade_quads(struct llvmpipe_context *llvmpipe, /* TODO: blend color */ - assert((((uintptr_t)mask) & 0xf) == 0); - assert((((uintptr_t)depth) & 0xf) == 0); - assert((((uintptr_t)color) & 0xf) == 0); - assert((((uintptr_t)llvmpipe->jit_context.blend_color) & 0xf) == 0); + /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ + assert(lp_check_alignment(mask, 16)); + + assert(lp_check_alignment(depth, 16)); + assert(lp_check_alignment(color, 16)); + assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); /* run shader */ fs->current->jit_function( &llvmpipe->jit_context, -- cgit v1.2.3 From cdbbcdf3bdb114d79cf7b9474436c3d26b135592 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 9 Sep 2009 21:16:06 +0100 Subject: llvmpipe: Include zsbuf's format in the fragment shader key. --- src/gallium/drivers/llvmpipe/lp_state.h | 1 + src/gallium/drivers/llvmpipe/lp_state_fs.c | 30 ++++++++++++++++-------------- 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 0b846ecb13..7b26ce61a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -66,6 +66,7 @@ struct lp_fragment_shader; struct lp_fragment_shader_variant_key { + enum pipe_format zsbuf_format; struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 1a3e168245..21e675c34e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -131,9 +131,8 @@ generate_pos0(LLVMBuilderRef builder, * Generate the depth test. */ static void -generate_depth(struct llvmpipe_context *lp, - LLVMBuilderRef builder, - const struct pipe_depth_state *state, +generate_depth(LLVMBuilderRef builder, + const struct lp_fragment_shader_variant_key *key, union lp_type src_type, struct lp_build_mask_context *mask, LLVMValueRef src, @@ -142,10 +141,10 @@ generate_depth(struct llvmpipe_context *lp, const struct util_format_description *format_desc; union lp_type dst_type; - if(!lp->framebuffer.zsbuf) + if(!key->depth.enabled) return; - format_desc = util_format_description(lp->framebuffer.zsbuf->format); + format_desc = util_format_description(key->zsbuf_format); assert(format_desc); /* Pick the depth type. */ @@ -165,7 +164,7 @@ generate_depth(struct llvmpipe_context *lp, #endif lp_build_depth_test(builder, - state, + &key->depth, dst_type, format_desc, mask, @@ -212,14 +211,13 @@ generate_fs(struct llvmpipe_context *lp, lp_build_mask_begin(&mask, builder, type, *pmask); early_depth_test = - lp->depth_stencil->depth.enabled && - lp->framebuffer.zsbuf && - !lp->depth_stencil->alpha.enabled && - !lp->fs->info.uses_kill && - !lp->fs->info.writes_z; + key->depth.enabled && + !key->alpha.enabled && + !shader->info.uses_kill && + !shader->info.writes_z; if(early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); @@ -268,7 +266,7 @@ generate_fs(struct llvmpipe_context *lp, } if(!early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); @@ -679,7 +677,11 @@ make_variant_key(struct llvmpipe_context *lp, memset(key, 0, sizeof *key); - memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + if(lp->framebuffer.zsbuf && + lp->depth_stencil->depth.enabled) { + key->zsbuf_format = lp->framebuffer.zsbuf->format; + memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + } key->alpha.enabled = lp->depth_stencil->alpha.enabled; if(key->alpha.enabled) -- cgit v1.2.3 From abc160b664c3fbd4c18a2cd3402c9a84f5f2d00f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 9 Sep 2009 21:17:20 +0100 Subject: llvmpipe: Fix depth mask computation. Fixes depth test for 24bit depth formats. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 2cd6e6b921..3f88a14b5d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -179,12 +179,13 @@ lp_build_depth_test(LLVMBuilderRef builder, padding_right = 0; for(chan = 0; chan < z_swizzle; ++chan) padding_right += format_desc->channel[chan].size; - padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size; + padding_left = format_desc->block.bits - + (padding_right + format_desc->channel[z_swizzle].size); if(padding_left || padding_right) { - const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1; - const long long mask_right = ((long long)1 << (padding_right)) - 1; - z_bitmask = lp_build_int_const_scalar(type, mask_left & mask_right); + const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; + const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; + z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); } if(padding_left) -- cgit v1.2.3 From 4139bc8f4375c1f8961b281b6303bccaab24367a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 9 Sep 2009 21:46:18 +0100 Subject: llvmpipe: Quick hack for 1D textures. --- src/gallium/drivers/llvmpipe/lp_bld_sample.h | 1 + src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h index 5798f191fe..6f565af76d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -53,6 +53,7 @@ struct lp_sampler_static_state { /* pipe_texture's state */ enum pipe_format format; + unsigned target:2; unsigned pot_width:1; unsigned pot_height:1; unsigned pot_depth:1; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 25c8d84501..bcc9e41c50 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -322,6 +322,9 @@ lp_build_sample_soa(LLVMBuilderRef builder, height = lp_build_broadcast_scalar(&bld.int_coord_bld, height); stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride); + if(static_state->target == PIPE_TEXTURE_1D) + t = bld.coord_bld.zero; + if(static_state->normalized_coords) { LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type); LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, ""); -- cgit v1.2.3 From bd3b59da632d85a062accab267e18b66274b857a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 10 Sep 2009 09:19:51 +0100 Subject: llvmpipe: Copy the texture target into the sampler static state. Hunk forgotten in previous commit. --- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index bcc9e41c50..6ebf107cc2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -61,6 +61,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, return; state->format = texture->format; + state->target = texture->target; state->pot_width = util_is_pot(texture->width[0]); state->pot_height = util_is_pot(texture->height[0]); state->pot_depth = util_is_pot(texture->depth[0]); -- cgit v1.2.3 From 8e6b925d2a963a2d5a403e106d7d25e3dcca0775 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 10 Sep 2009 11:44:03 +0100 Subject: llvmpipe: Proper control flow builders. New control flow helper functions which keep track of all variables and generate the correct Phi functions. This re-enables skipping the fs execution of quads masked out by the rasterizer, early z testing, and kill opcode. This yields a performance improvement of around 20%. --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 401 +++++++++++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_bld_flow.h | 38 ++- src/gallium/drivers/llvmpipe/lp_state_fs.c | 47 +++- 3 files changed, 426 insertions(+), 60 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 9d99e1a9d9..1252593226 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -32,59 +32,261 @@ */ #include "util/u_debug.h" +#include "util/u_memory.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" +#define LP_BUILD_FLOW_MAX_VARIABLES 32 +#define LP_BUILD_FLOW_MAX_DEPTH 32 + + +/** + * Enumeration of all possible flow constructs. + */ +enum lp_build_flow_construct_kind { + lP_BUILD_FLOW_SCOPE, + LP_BUILD_FLOW_SKIP, +}; + + +/** + * Variable declaration scope. + */ +struct lp_build_flow_scope +{ + /** Number of variables declared in this scope */ + unsigned num_variables; +}; + + +/** + * Early exit. Useful to skip to the end of a function or block when + * the execution mask becomes zero or when there is an error condition. + */ +struct lp_build_flow_skip +{ + /** Block to skip to */ + LLVMBasicBlockRef block; + + /** Number of variables declared at the beginning */ + unsigned num_variables; + + LLVMValueRef *phi; +}; + + +/** + * Union of all possible flow constructs' data + */ +union lp_build_flow_construct_data +{ + struct lp_build_flow_scope scope; + struct lp_build_flow_skip skip; +}; + + +/** + * Element of the flow construct stack. + */ +struct lp_build_flow_construct +{ + enum lp_build_flow_construct_kind kind; + union lp_build_flow_construct_data data; +}; + + +/** + * All necessary data to generate LLVM control flow constructs. + * + * Besides keeping track of the control flow construct themselves we also + * need to keep track of variables in order to generate SSA Phi values. + */ +struct lp_build_flow_context +{ + LLVMBuilderRef builder; + + /** + * Control flow stack. + */ + struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; + unsigned num_constructs; + + /** + * Variable stack + */ + LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; + unsigned num_variables; +}; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder) +{ + struct lp_build_flow_context *flow; + + flow = CALLOC_STRUCT(lp_build_flow_context); + if(!flow) + return NULL; + + flow->builder = builder; + + return flow; +} + + void -lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, - union lp_type type, - LLVMValueRef value) +lp_build_flow_destroy(struct lp_build_flow_context *flow) { - memset(mask, 0, sizeof *mask); + assert(flow->num_constructs == 0); + assert(flow->num_variables == 0); + FREE(flow); +} - mask->builder = builder; - mask->reg_type = LLVMIntType(type.width * type.length); - mask->value = value; + +static union lp_build_flow_construct_data * +lp_build_flow_push(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); + if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) + return NULL; + + flow->constructs[flow->num_constructs].kind = kind; + return &flow->constructs[flow->num_constructs++].data; +} + + +static union lp_build_flow_construct_data * +lp_build_flow_peek(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[flow->num_constructs - 1].data; +} + + +static union lp_build_flow_construct_data * +lp_build_flow_pop(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[--flow->num_constructs].data; } +/** + * Begin a variable scope. + * + * + */ void -lp_build_mask_update(struct lp_build_mask_context *mask, - LLVMValueRef value) +lp_build_flow_scope_begin(struct lp_build_flow_context *flow) { + struct lp_build_flow_scope *scope; - LLVMValueRef cond; - LLVMBasicBlockRef current_block; - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef new_block; + scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - if(mask->value) - mask->value = LLVMBuildAnd(mask->builder, mask->value, value, ""); - else - mask->value = value; + scope->num_variables = 0; +} - /* FIXME: disabled until we have proper control flow helpers */ -#if 0 - cond = LLVMBuildICmp(mask->builder, - LLVMIntEQ, - LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""), - LLVMConstNull(mask->reg_type), - ""); - current_block = LLVMGetInsertBlock(mask->builder); +/** + * Declare a variable. + * + * A variable is a named entity which can have different LLVMValueRef's at + * different points of the program. This is relevant for control flow because + * when there are mutiple branches to a same location we need to replace + * the variable's value with a Phi function as explained in + * http://en.wikipedia.org/wiki/Static_single_assignment_form . + * + * We keep track of variables by keeping around a pointer to where their + * current. + * + * There are a few cautions to observe: + * + * - Variable's value must not be NULL. If there is no initial value then + * LLVMGetUndef() should be used. + * + * - Variable's value must be kept up-to-date. If the variable is going to be + * modified by a function then a pointer should be passed so that its value + * is accurate. Failure to do this will cause some of the variables' + * transient values to be lost, leading to wrong results. + * + * - A program should be written from top to bottom, by always appending + * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or + * modifying existing statements will most likely lead to wrong results. + * + */ +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; + + assert(*variable); + if(!*variable) + return; + + assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); + if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) + return; + + flow->variables[flow->num_variables++] = variable; + ++scope->num_variables; +} - if(!mask->skip_block) { - LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - mask->skip_block = LLVMAppendBasicBlock(function, "skip"); - mask->phi = LLVMBuildPhi(mask->builder, LLVMTypeOf(mask->value), ""); +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; + + assert(flow->num_variables >= scope->num_variables); + if(flow->num_variables < scope->num_variables) { + flow->num_variables = 0; + return; } + flow->num_variables -= scope->num_variables; +} + + +static LLVMBasicBlockRef +lp_build_flow_insert_block(struct lp_build_flow_context *flow) +{ + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef new_block; + + current_block = LLVMGetInsertBlock(flow->builder); + next_block = LLVMGetNextBasicBlock(current_block); - assert(next_block); if(next_block) { new_block = LLVMInsertBasicBlock(next_block, ""); } @@ -93,30 +295,139 @@ lp_build_mask_update(struct lp_build_mask_context *mask, new_block = LLVMAppendBasicBlock(function, ""); } - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block); + return new_block; +} + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_skip *skip; + LLVMBuilderRef builder; + unsigned i; + + skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + skip->block = lp_build_flow_insert_block(flow); + skip->num_variables = flow->num_variables; + if(!skip->num_variables) { + skip->phi = NULL; + return; + } + + skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); + if(!skip->phi) { + skip->num_variables = 0; + return; + } + + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, skip->block); - LLVMPositionBuilderAtEnd(mask->builder, new_block); -#endif + for(i = 0; i < skip->num_variables; ++i) + skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + + LLVMDisposeBuilder(builder); } -LLVMValueRef -lp_build_mask_end(struct lp_build_mask_context *mask) +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond) +{ + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef new_block; + unsigned i; + + skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + current_block = LLVMGetInsertBlock(flow->builder); + + new_block = lp_build_flow_insert_block(flow); + + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); + } + + LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); + + LLVMPositionBuilderAtEnd(flow->builder, new_block); + } + + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow) { - if(mask->skip_block) { - LLVMBasicBlockRef current_block = LLVMGetInsertBlock(mask->builder); + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + unsigned i; - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildBr(mask->builder, mask->skip_block); + skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; - LLVMPositionBuilderAtEnd(mask->builder, mask->skip_block); + current_block = LLVMGetInsertBlock(flow->builder); - mask->value = mask->phi; - mask->phi = NULL; - mask->skip_block = NULL; + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); + *flow->variables[i] = skip->phi[i]; } + LLVMBuildBr(flow->builder, skip->block); + LLVMPositionBuilderAtEnd(flow->builder, skip->block); + + FREE(skip->phi); +} + + +void +lp_build_mask_begin(struct lp_build_mask_context *mask, + struct lp_build_flow_context *flow, + union lp_type type, + LLVMValueRef value) +{ + memset(mask, 0, sizeof *mask); + + mask->flow = flow; + mask->reg_type = LLVMIntType(type.width * type.length); + mask->value = value; + + lp_build_flow_scope_begin(flow); + lp_build_flow_scope_declare(flow, &mask->value); + lp_build_flow_skip_begin(flow); +} + + +void +lp_build_mask_update(struct lp_build_mask_context *mask, + LLVMValueRef value) +{ + LLVMBuilderRef builder = mask->flow->builder; + LLVMValueRef cond; + + mask->value = LLVMBuildAnd(builder, mask->value, value, ""); + + cond = LLVMBuildICmp(builder, + LLVMIntEQ, + LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMConstNull(mask->reg_type), + ""); + + lp_build_flow_skip_cond_break(mask->flow, cond); +} + + +LLVMValueRef +lp_build_mask_end(struct lp_build_mask_context *mask) +{ + lp_build_flow_skip_end(mask->flow); + lp_build_flow_scope_end(mask->flow); return mask->value; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 1b634ff038..9d76e3064d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -41,23 +41,49 @@ union lp_type; +struct lp_build_flow_context; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder); + +void +lp_build_flow_destroy(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable); + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond); + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow); + + struct lp_build_mask_context { - LLVMBuilderRef builder; + struct lp_build_flow_context *flow; LLVMTypeRef reg_type; LLVMValueRef value; - - LLVMValueRef phi; - - LLVMBasicBlockRef skip_block; }; void lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, + struct lp_build_flow_context *flow, union lp_type type, LLVMValueRef value); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 21e675c34e..2e60da60cd 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -197,6 +197,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z = interp->pos[2]; + struct lp_build_flow_context *flow; struct lp_build_mask_context mask; boolean early_depth_test; unsigned attrib; @@ -208,7 +209,33 @@ generate_fs(struct llvmpipe_context *lp, consts_ptr = lp_jit_context_constants(builder, context_ptr); - lp_build_mask_begin(&mask, builder, type, *pmask); + flow = lp_build_flow_create(builder); + + memset(outputs, 0, sizeof outputs); + + lp_build_flow_scope_begin(flow); + + /* Declare the color and z variables */ + for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + boolean declare = FALSE; + switch (shader->info.output_semantic_name[attrib]) { + case TGSI_SEMANTIC_COLOR: + declare = TRUE; + break; + case TGSI_SEMANTIC_POSITION: + if(chan == 2) + declare = TRUE; + break; + } + if(declare) { + outputs[attrib][chan] = LLVMGetUndef(vec_type); + lp_build_flow_scope_declare(flow, &outputs[attrib][chan]); + } + } + } + + lp_build_mask_begin(&mask, flow, type, *pmask); early_depth_test = key->depth.enabled && @@ -221,12 +248,19 @@ generate_fs(struct llvmpipe_context *lp, type, &mask, z, depth_ptr); - memset(outputs, 0, sizeof outputs); - lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, outputs, sampler); + if(!early_depth_test) + generate_depth(builder, key, + type, &mask, + z, depth_ptr); + + lp_build_mask_end(&mask); + + lp_build_flow_scope_end(flow); + for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(outputs[attrib][chan]) { @@ -265,12 +299,7 @@ generate_fs(struct llvmpipe_context *lp, } } - if(!early_depth_test) - generate_depth(builder, key, - type, &mask, - z, depth_ptr); - - lp_build_mask_end(&mask); + lp_build_flow_destroy(flow); *pmask = mask.value; -- cgit v1.2.3 From c3c80c5c22f9ce7fabba90daa5d5142e5fb1c012 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 10 Sep 2009 12:01:42 +0100 Subject: llvmpipe: Skip blending when mask is zero. This increases quake3 timedemo fps another 10%. --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 31 +++++++++++++++++++----------- src/gallium/drivers/llvmpipe/lp_state_fs.c | 12 ++++++++++-- 2 files changed, 30 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 1252593226..69ed014ff3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -386,6 +386,22 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow) } +static void +lp_build_mask_check(struct lp_build_mask_context *mask) +{ + LLVMBuilderRef builder = mask->flow->builder; + LLVMValueRef cond; + + cond = LLVMBuildICmp(builder, + LLVMIntEQ, + LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMConstNull(mask->reg_type), + ""); + + lp_build_flow_skip_cond_break(mask->flow, cond); +} + + void lp_build_mask_begin(struct lp_build_mask_context *mask, struct lp_build_flow_context *flow, @@ -401,6 +417,8 @@ lp_build_mask_begin(struct lp_build_mask_context *mask, lp_build_flow_scope_begin(flow); lp_build_flow_scope_declare(flow, &mask->value); lp_build_flow_skip_begin(flow); + + lp_build_mask_check(mask); } @@ -408,18 +426,9 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) { - LLVMBuilderRef builder = mask->flow->builder; - LLVMValueRef cond; - - mask->value = LLVMBuildAnd(builder, mask->value, value, ""); - - cond = LLVMBuildICmp(builder, - LLVMIntEQ, - LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), - LLVMConstNull(mask->reg_type), - ""); + mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); - lp_build_flow_skip_cond_break(mask->flow, cond); + lp_build_mask_check(mask); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2e60da60cd..354d9916c7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -319,6 +319,8 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef dst_ptr) { struct lp_build_context bld; + struct lp_build_flow_context *flow; + struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; @@ -327,11 +329,14 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef res[4]; unsigned chan; + lp_build_context_init(&bld, builder, type); + + flow = lp_build_flow_create(builder); + lp_build_mask_begin(&mask_ctx, flow, type, mask); + vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); - lp_build_context_init(&bld, builder, type); - const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); @@ -354,6 +359,9 @@ generate_blend(const struct pipe_blend_state *blend, res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); } + + lp_build_mask_end(&mask_ctx); + lp_build_flow_destroy(flow); } -- cgit v1.2.3 From 48f19c0bcdc56d33ef2873eeabe635380764e968 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 10 Sep 2009 12:14:53 +0100 Subject: llvmpipe: Fix sampling from depth textures. Respect texture compare func. Fixes Mesa shadowtex sample. --- src/gallium/drivers/llvmpipe/lp_bld_format_soa.c | 53 ++++++---- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 118 +++++++++++++++++------ 2 files changed, 122 insertions(+), 49 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c index 36bac06d2e..569e8d10a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -83,6 +83,30 @@ lp_build_gather(LLVMBuilderRef builder, } +static LLVMValueRef +lp_build_format_swizzle(union lp_type type, + const LLVMValueRef *inputs, + enum util_format_swizzle swizzle) +{ + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + return inputs[swizzle]; + case UTIL_FORMAT_SWIZZLE_0: + return lp_build_zero(type); + case UTIL_FORMAT_SWIZZLE_1: + return lp_build_one(type); + case UTIL_FORMAT_SWIZZLE_NONE: + return lp_build_undef(type); + default: + assert(0); + return lp_build_undef(type); + } +} + + void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, @@ -146,25 +170,16 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, start = stop; } - for (chan = 0; chan < 4; ++chan) { - enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_X: - case UTIL_FORMAT_SWIZZLE_Y: - case UTIL_FORMAT_SWIZZLE_Z: - case UTIL_FORMAT_SWIZZLE_W: - rgba[chan] = inputs[swizzle]; - break; - case UTIL_FORMAT_SWIZZLE_0: - rgba[chan] = lp_build_zero(type); - break; - case UTIL_FORMAT_SWIZZLE_1: - rgba[chan] = lp_build_one(type); - break; - case UTIL_FORMAT_SWIZZLE_NONE: - rgba[chan] = lp_build_undef(type); - break; + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle); + rgba[2] = rgba[1] = rgba[0] = depth; + rgba[3] = lp_build_one(type); + } + else { + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 6ebf107cc2..2913b17d3f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -42,6 +42,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_arit.h" +#include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" @@ -72,8 +73,10 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->min_img_filter = sampler->min_img_filter; state->min_mip_filter = sampler->min_mip_filter; state->mag_img_filter = sampler->mag_img_filter; - state->compare_mode = sampler->compare_mode; - state->compare_func = sampler->compare_func; + if(sampler->compare_mode) { + state->compare_mode = sampler->compare_mode; + state->compare_func = sampler->compare_func; + } state->normalized_coords = sampler->normalized_coords; state->prefilter = sampler->prefilter; } @@ -115,17 +118,52 @@ lp_build_sample_texel(struct lp_build_sample_context *bld, LLVMValueRef data_ptr, LLVMValueRef *texel) { + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef x_stride; - LLVMValueRef x_offset; - LLVMValueRef y_offset; LLVMValueRef offset; x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); - x_offset = lp_build_mul(&bld->int_coord_bld, x, x_stride); - y_offset = lp_build_mul(&bld->int_coord_bld, y, y_stride); + if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, ""); + + x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo); - offset = lp_build_add(&bld->int_coord_bld, x_offset, y_offset); + x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(int_coord_bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(int_coord_bld, x, x_stride); + y_offset = lp_build_mul(int_coord_bld, y, y_stride); + + offset = lp_build_add(int_coord_bld, x_offset, y_offset); + } lp_build_load_rgba_soa(bld->builder, bld->format_desc, @@ -249,30 +287,48 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, /* TODO: Don't interpolate missing channels */ for(chan = 0; chan < 4; ++chan) { - switch(bld->format_desc->swizzle[chan]) { - case UTIL_FORMAT_SWIZZLE_X: - case UTIL_FORMAT_SWIZZLE_Y: - case UTIL_FORMAT_SWIZZLE_Z: - case UTIL_FORMAT_SWIZZLE_W: - texel[chan] = lp_build_lerp_2d(&bld->texel_bld, - s_fpart, t_fpart, - neighbors[0][0][chan], - neighbors[0][1][chan], - neighbors[1][0][chan], - neighbors[1][1][chan]); - break; - case UTIL_FORMAT_SWIZZLE_0: - texel[chan] = bld->texel_bld.zero; - break; - case UTIL_FORMAT_SWIZZLE_1: - texel[chan] = bld->texel_bld.one; - break; - default: - assert(0); - texel[chan] = bld->texel_bld.undef; - break; - } + texel[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + } +} + + +static void +lp_build_sample_compare(struct lp_build_sample_context *bld, + LLVMValueRef p, + LLVMValueRef *texel) +{ + struct lp_build_context *texel_bld = &bld->texel_bld; + LLVMValueRef res; + unsigned chan; + + if(!bld->static_state->compare_mode) + return; + + /* TODO: Compare before swizzling, to avoid redundant computations */ + res = NULL; + for(chan = 0; chan < 4; ++chan) { + LLVMValueRef cmp; + cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); + cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero); + + if(res) + res = lp_build_add(texel_bld, res, cmp); + else + res = cmp; } + + assert(res); + res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + for(chan = 0; chan < 3; ++chan) + texel[chan] = res; + texel[3] = texel_bld->one; } @@ -343,4 +399,6 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; } + + lp_build_sample_compare(&bld, p, texel); } -- cgit v1.2.3 From 4c3a48ad0cb36e6d8601535b91f83caed0d07570 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 10 Sep 2009 12:37:44 +0100 Subject: llvmpipe: Mask out color channels not present in the color buffer. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 354d9916c7..ccbccc813f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -354,10 +354,12 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(chan = 0; chan < 4; ++chan) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); - lp_build_name(res[chan], "res.%c", "rgba"[chan]); - res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); - LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + if(blend->colormask & (1 << chan)) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); + lp_build_name(res[chan], "res.%c", "rgba"[chan]); + res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); + LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + } } lp_build_mask_end(&mask_ctx); @@ -725,7 +727,23 @@ make_variant_key(struct llvmpipe_context *lp, key->alpha.func = lp->depth_stencil->alpha.func; /* alpha.ref_value is passed in jit_context */ - memcpy(&key->blend, lp->blend, sizeof key->blend); + if(lp->framebuffer.cbufs[0]) { + const struct util_format_description *format_desc; + unsigned chan; + + memcpy(&key->blend, lp->blend, sizeof key->blend); + + format_desc = util_format_description(lp->framebuffer.cbufs[0]->format); + assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); + + /* mask out color channels not present in the color buffer */ + for(chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + if(swizzle > 4) + key->blend.colormask &= ~(1 << chan); + } + } for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) -- cgit v1.2.3 From 6a405b4a21ac1fa45a93da37ce6b95d98d17f0e2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 10 Sep 2009 13:35:39 +0100 Subject: llvmpipe: Fix alpha test. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 39 ++++++++++-------------------- 1 file changed, 13 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index ccbccc813f..618cf1ffb8 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -216,24 +216,11 @@ generate_fs(struct llvmpipe_context *lp, lp_build_flow_scope_begin(flow); /* Declare the color and z variables */ - for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - boolean declare = FALSE; - switch (shader->info.output_semantic_name[attrib]) { - case TGSI_SEMANTIC_COLOR: - declare = TRUE; - break; - case TGSI_SEMANTIC_POSITION: - if(chan == 2) - declare = TRUE; - break; - } - if(declare) { - outputs[attrib][chan] = LLVMGetUndef(vec_type); - lp_build_flow_scope_declare(flow, &outputs[attrib][chan]); - } - } + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + color[chan] = LLVMGetUndef(vec_type); + lp_build_flow_scope_declare(flow, &color[chan]); } + lp_build_flow_scope_declare(flow, &z); lp_build_mask_begin(&mask, flow, type, *pmask); @@ -252,15 +239,6 @@ generate_fs(struct llvmpipe_context *lp, consts_ptr, interp->pos, interp->inputs, outputs, sampler); - if(!early_depth_test) - generate_depth(builder, key, - type, &mask, - z, depth_ptr); - - lp_build_mask_end(&mask); - - lp_build_flow_scope_end(flow); - for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(outputs[attrib][chan]) { @@ -299,6 +277,15 @@ generate_fs(struct llvmpipe_context *lp, } } + if(!early_depth_test) + generate_depth(builder, key, + type, &mask, + z, depth_ptr); + + lp_build_mask_end(&mask); + + lp_build_flow_scope_end(flow); + lp_build_flow_destroy(flow); *pmask = mask.value; -- cgit v1.2.3 From 1fc41002252419f4688c24ea8c3814553b3d76ad Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Sep 2009 11:24:00 +0100 Subject: llvmpipe: Update status in README and TODO/FIXME comments throughout the code. --- src/gallium/drivers/llvmpipe/README | 23 +++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_bld_conv.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 1 + src/gallium/drivers/llvmpipe/lp_bld_logic.c | 11 ++++------- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 7 +++++++ src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 19 ++++++------------- src/gallium/drivers/llvmpipe/lp_context.c | 2 -- src/gallium/drivers/llvmpipe/lp_jit.h | 3 +-- src/gallium/drivers/llvmpipe/lp_setup.c | 2 -- 9 files changed, 36 insertions(+), 36 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 498d21dea6..89d08834a3 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -8,13 +8,16 @@ Done so far is: - the whole fragment pipeline is code generated in a single function + - input interpolation + - depth testing + - texture sampling (not all state/formats are supported) + - fragment shader TGSI translation - same level of support as the TGSI SSE2 exec machine, with the exception we don't fallback to TGSI interpretation when an unsupported opcode is found, but just ignore it - - texture sampling via an intrinsic call - done in SoA layout - input interpolation also code generated @@ -28,16 +31,17 @@ Done so far is: any width and length - not all operations are implemented for these types yet though -Most mesa/progs/demos/* work. Speed is on par with Keith's softpipe-opt branch, -which includes hand written fast implementations for common cases. +Most mesa/progs/demos/* work. To do (probably by this order): - code generate stipple and stencil testing - - code generate texture sampling + - translate the remaining bits of texture sampling state - translate TGSI control flow instructions, and all other remaining opcodes + + - integrate with the draw module for VS code generation - code generate the triangle setup and rasterization @@ -93,7 +97,7 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as make linux-llvm -but the rest of these instructions assume scons is used. +but the rest of these instructions assume that scons is used. Using @@ -108,6 +112,9 @@ or export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH +For performance evaluation pass debug=no to scons, and use the corresponding +lib directory without the "-debug" suffix. + Unit testing ============ @@ -119,7 +126,7 @@ build/linux-???-debug/gallium/drivers/llvmpipe: - lp_test_conv: SIMD vector conversion - lp_test_format: pixel unpacking/packing -Some of this tests can output results and benchmarks to a tab-seperated-file +Some of this tests can output results and benchmarks to a tab-separated-file for posterior analysis, e.g.: build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv @@ -133,10 +140,10 @@ Development Notes at the top of the lp_bld_*.c functions. - All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be - put in a standalone Gallium state -> LLVM IR translation module. + put in a stand-alone Gallium state -> LLVM IR translation module. - We use LLVM-C bindings for now. They are not documented, but follow the C++ interfaces very closely, and appear to be complete enough for code generation. See http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - for a standalone example. + for a stand-alone example. diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index c8954c8a34..cd01496d54 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -122,7 +122,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, int shift = dst_width - n; res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); - /* Fill in the empty lower bits for added precision? */ + /* TODO: Fill in the empty lower bits for additional precision? */ #if 0 { LLVMValueRef msb; @@ -244,7 +244,7 @@ lp_build_const_pack_shuffle(unsigned n) * Expand the bit width. * * This will only change the number of bits the values are represented, not the - * values themselved. + * values themselves. */ static void lp_build_expand(LLVMBuilderRef builder, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 3f88a14b5d..e5fe81193f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -211,5 +211,6 @@ lp_build_depth_test(LLVMBuilderRef builder, LLVMBuildStore(builder, dst, dst_ptr); } + /* FIXME */ assert(!state->occlusion_count); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 8631efd6c3..d566780e5d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -313,8 +313,6 @@ lp_build_select(struct lp_build_context *bld, b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); } - /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */ - a = LLVMBuildAnd(bld->builder, a, mask, ""); /* This often gets translated to PANDN, but sometimes the NOT is @@ -376,9 +374,9 @@ lp_build_select_aos(struct lp_build_context *bld, return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); } + else { #if 0 - else if(0) { - /* FIXME: Unfortunately select of vectors do not work */ + /* XXX: Unfortunately select of vectors do not work */ /* Use a select */ LLVMTypeRef elem_type = LLVMInt1Type(); LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; @@ -388,10 +386,9 @@ lp_build_select_aos(struct lp_build_context *bld, cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); - } -#endif - else { +#else LLVMValueRef mask = lp_build_const_mask_aos(type, cond); return lp_build_select(bld, mask, a, b); +#endif } } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 2913b17d3f..3ca25b0e76 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -207,6 +207,7 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + /* FIXME */ default: assert(0); } @@ -398,7 +399,13 @@ lp_build_sample_soa(LLVMBuilderRef builder, case PIPE_TEX_FILTER_ANISO: lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; + default: + assert(0); } + /* FIXME: respect static_state->min_mip_filter */; + /* FIXME: respect static_state->mag_img_filter */; + /* FIXME: respect static_state->prefilter */; + lp_build_sample_compare(&bld, p, texel); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 3ce379de12..4756811dc3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -167,6 +167,7 @@ emit_fetch( break; case TGSI_UTIL_SIGN_SET: + /* TODO: Use bitwese OR for floating point */ res = lp_build_abs( &bld->base, res ); res = LLVMBuildNeg( bld->base.builder, res, "" ); break; @@ -349,14 +350,6 @@ emit_kil( } -static void -emit_kilp( - struct lp_build_tgsi_soa_context *bld ) -{ - /* XXX todo / fix me */ -} - - /** * Check if inst src/dest regs use indirect addressing into temporary * register file. @@ -398,6 +391,7 @@ emit_instruction( switch (inst->Instruction.Opcode) { #if 0 case TGSI_OPCODE_ARL: + /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_flr(bld, 0, 0); @@ -686,6 +680,7 @@ emit_instruction( break; case TGSI_OPCODE_CND: + /* FIXME */ return 0; break; @@ -849,13 +844,11 @@ emit_instruction( return 0; break; -#if 0 case TGSI_OPCODE_KILP: /* predicated kill */ - emit_kilp( bld ); - return 0; /* XXX fix me */ + /* FIXME */ + return 0; break; -#endif case TGSI_OPCODE_KIL: /* conditional kill */ @@ -1309,7 +1302,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - /* Input already interpolated */ + /* Inputs already interpolated */ break; case TGSI_TOKEN_TYPE_INSTRUCTION: diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 233d1df0e1..a4b2bd8c2a 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -141,8 +141,6 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, return PIPE_REFERENCED_FOR_WRITE; } - /* FIXME: we also need to do the same for the texture cache */ - return PIPE_UNREFERENCED; } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index c3e3e1af67..58f716ede2 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -80,10 +80,9 @@ struct lp_jit_context struct tgsi_sampler **samplers; - /* TODO: alpha reference value */ float alpha_ref_value; - /* TODO: blend constant color */ + /* FIXME: store (also?) in floats */ uint8_t *blend_color; struct lp_jit_texture textures[PIPE_MAX_SAMPLERS]; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index f9e254efca..2d2fc19a65 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -163,8 +163,6 @@ shade_quads(struct llvmpipe_context *llvmpipe, else depth = NULL; - /* TODO: blend color */ - /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(mask, 16)); -- cgit v1.2.3 From 672c5f52d1f97ed20d9f382b33c13919ec811684 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Sep 2009 11:29:24 +0100 Subject: llvmpipe: set dirty_render_cache in llvmpipe_clear() Based on Brian's softpipe change on commit 988db641195819c948249a1bb2d59f13577a482f. We don't use the tile cache for zsbuf though, only for color buffers. --- src/gallium/drivers/llvmpipe/lp_clear.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index 580cca5b46..bdcff94b9b 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -67,6 +67,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, util_pack_color(rgba, ps->format, &cv); lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv); } + llvmpipe->dirty_render_cache = TRUE; } if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { -- cgit v1.2.3 From d81086a86bd7c82eae5a8f0d1092a30c23626257 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Sep 2009 13:39:14 -0600 Subject: llvmpipe: asst fixes for 'make linux-llvmpipe' --- src/gallium/drivers/llvmpipe/Makefile | 2 ++ src/gallium/winsys/xlib/Makefile | 1 + src/gallium/winsys/xlib/xlib_llvmpipe.c | 3 +++ 3 files changed, 6 insertions(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 6e63a0c2b7..5ac09de79e 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -3,6 +3,8 @@ include $(TOP)/configs/current LIBNAME = llvmpipe +CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS + C_SOURCES = \ lp_bld_alpha.c \ lp_bld_arit.c \ diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index 522f6dc5ae..3a1945d92c 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -34,6 +34,7 @@ XLIB_WINSYS_SOURCES = \ xlib_brw_aub.c \ xlib_brw_context.c \ xlib_brw_screen.c \ + xlib_llvmpipe.c \ xlib_softpipe.c \ xlib_trace.c diff --git a/src/gallium/winsys/xlib/xlib_llvmpipe.c b/src/gallium/winsys/xlib/xlib_llvmpipe.c index bc876591c0..3dd15e099b 100644 --- a/src/gallium/winsys/xlib/xlib_llvmpipe.c +++ b/src/gallium/winsys/xlib/xlib_llvmpipe.c @@ -33,6 +33,8 @@ */ +#if defined(GALLIUM_LLVMPIPE) + #include "xm_api.h" #undef ASSERT @@ -459,3 +461,4 @@ struct xm_driver xlib_llvmpipe_driver = +#endif /* GALLIUM_LLVMPIPE */ -- cgit v1.2.3 From d7aa114e166c5f5330ecbe321adad65ad2cd54aa Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Sep 2009 13:45:48 +0100 Subject: llvmpipe: Rename function to free up lp_build_trunc to the usual arithmetic meaning. --- src/gallium/drivers/llvmpipe/lp_bld_conv.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index cd01496d54..c5a71d2c72 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -391,11 +391,11 @@ lp_build_pack2(LLVMBuilderRef builder, * TODO: Handle saturation consistently. */ static LLVMValueRef -lp_build_trunc(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, - boolean clamped, - const LLVMValueRef *src, unsigned num_srcs) +lp_build_pack(LLVMBuilderRef builder, + union lp_type src_type, + union lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs) { LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned i; @@ -565,7 +565,7 @@ lp_build_conv(LLVMBuilderRef builder, if(tmp_type.width > dst_type.width) { assert(num_dsts == 1); - tmp[0] = lp_build_trunc(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); + tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); tmp_type.width = dst_type.width; tmp_type.length = dst_type.length; num_tmps = 1; @@ -689,7 +689,7 @@ lp_build_conv_mask(LLVMBuilderRef builder, if(src_type.width > dst_type.width) { assert(num_dsts == 1); - dst[0] = lp_build_trunc(builder, src_type, dst_type, TRUE, src, num_srcs); + dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); } else if(src_type.width < dst_type.width) { assert(num_srcs == 1); -- cgit v1.2.3 From 00dd0156e08d2801aa2bc5454f94692bf65a33a6 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Sep 2009 13:50:19 +0100 Subject: llvmpipe: Add a few more common arithmetic functions. We are relying on SSE4.1 for round/trunc/ceil/floor. We'll need to eventually find alternatives for the rest of the world. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 95 ++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_arit.h | 16 +++++ 2 files changed, 111 insertions(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index be7442d00a..ce3e5f91c0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -623,6 +623,47 @@ lp_build_abs(struct lp_build_context *bld, } +LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef cond; + LLVMValueRef res; + + /* Handle non-zero case */ + if(!type.sign) { + /* if not zero then sign must be positive */ + res = bld->one; + } + else if(type.floating) { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef one; + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + one = LLVMConstBitCast(bld->one, int_vec_type); + res = LLVMBuildOr(bld->builder, sign, one, ""); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } + else + { + LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); + cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); + res = lp_build_select(bld, cond, bld->one, minus_one); + } + + /* Handle zero */ + cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); + res = lp_build_select(bld, cond, bld->zero, bld->one); + + return res; +} + + enum lp_build_round_sse41_mode { LP_BUILD_ROUND_SSE41_NEAREST = 0, @@ -661,6 +702,24 @@ lp_build_round_sse41(struct lp_build_context *bld, } +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + LLVMValueRef lp_build_floor(struct lp_build_context *bld, LLVMValueRef a) @@ -679,6 +738,42 @@ lp_build_floor(struct lp_build_context *bld, } +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + /** * Convert to integer, through whichever rounding method that's fastest, * typically truncating to zero. diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 383c3c3313..5e083b847f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -105,10 +105,26 @@ LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a); +LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a); + LLVMValueRef lp_build_floor(struct lp_build_context *bld, LLVMValueRef a); +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a); + LLVMValueRef lp_build_int(struct lp_build_context *bld, LLVMValueRef a); -- cgit v1.2.3 From 873773ee2b034e8df72ddfacc764915b8a76ebe2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Sep 2009 13:55:08 +0100 Subject: llvmpipe: Translate more TGSI opcodes. Basically cover all low hanging fruit, and mark the still missing opcodes as "fixme" or deprecated. --- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 139 +++++++++++++++++++++---- 1 file changed, 116 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 4756811dc3..c6488d073c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -680,8 +680,15 @@ emit_instruction( break; case TGSI_OPCODE_CND: - /* FIXME */ - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp1 = lp_build_const_scalar(bld->base.type, 0.5); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); + dst0 = lp_build_select( &bld->base, tmp0, src0, src1 ); + emit_store( bld, inst, 0, chan_index, dst0); + } break; case TGSI_OPCODE_DP2A: @@ -699,23 +706,30 @@ emit_instruction( } break; -#if 0 case TGSI_OPCODE_FRC: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_frc( bld, 0, 0 ); + src0 = emit_fetch( bld, inst, 0, chan_index ); + tmp0 = lp_build_floor(&bld->base, src0); + tmp0 = lp_build_sub(&bld->base, tmp0, src0); emit_store( bld, inst, 0, chan_index, tmp0); } break; case TGSI_OPCODE_CLAMP: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp0 = lp_build_max(&bld->base, tmp0, src1); + tmp0 = lp_build_min(&bld->base, tmp0, src2); + emit_store( bld, inst, 0, chan_index, tmp0); + } break; case TGSI_OPCODE_FLR: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_flr( bld, 0, 0 ); + tmp0 = lp_build_floor(&bld->base, tmp0); emit_store( bld, inst, 0, chan_index, tmp0); } break; @@ -723,11 +737,10 @@ emit_instruction( case TGSI_OPCODE_ROUND: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_rnd( bld, 0, 0 ); + tmp0 = lp_build_round(&bld->base, tmp0); emit_store( bld, inst, 0, chan_index, tmp0); } break; -#endif case TGSI_OPCODE_EX2: { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); @@ -806,8 +819,9 @@ emit_instruction( break; case TGSI_OPCODE_RCC: + /* deprecated? */ + assert(0); return 0; - break; case TGSI_OPCODE_DPH: tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); @@ -837,10 +851,12 @@ emit_instruction( break; case TGSI_OPCODE_DDX: + /* FIXME */ return 0; break; case TGSI_OPCODE_DDY: + /* FIXME */ return 0; break; @@ -886,7 +902,10 @@ emit_instruction( break; case TGSI_OPCODE_SFL: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0 = bld->base.zero; + emit_store( bld, inst, 0, chan_index, dst0); + } break; case TGSI_OPCODE_SGT: @@ -928,7 +947,10 @@ emit_instruction( break; case TGSI_OPCODE_STR: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0 = bld->base.one; + emit_store( bld, inst, 0, chan_index, dst0); + } break; case TGSI_OPCODE_TEX: @@ -936,35 +958,49 @@ emit_instruction( break; case TGSI_OPCODE_TXD: + /* FIXME */ return 0; break; case TGSI_OPCODE_UP2H: + /* deprecated */ + assert (0); return 0; break; case TGSI_OPCODE_UP2US: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_UP4B: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_UP4UB: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_X2D: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_ARA: + /* deprecated */ + assert(0); return 0; break; #if 0 case TGSI_OPCODE_ARR: + /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_rnd( bld, 0, 0 ); @@ -975,32 +1011,33 @@ emit_instruction( #endif case TGSI_OPCODE_BRA: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_CAL: + /* FIXME */ return 0; break; -#if 0 case TGSI_OPCODE_RET: - emit_ret( bld ); + /* FIXME */ + return 0; break; -#endif case TGSI_OPCODE_END: break; -#if 0 case TGSI_OPCODE_SSG: /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_sgn( bld, 0, 0 ); + tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); + tmp0 = lp_build_sgn( &bld->base, tmp0 ); emit_store( bld, inst, 0, chan_index, tmp0); } break; -#endif case TGSI_OPCODE_CMP: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1126,6 +1163,8 @@ emit_instruction( break; case TGSI_OPCODE_DIV: + /* deprecated */ + assert( 0 ); return 0; break; @@ -1151,105 +1190,146 @@ emit_instruction( break; case TGSI_OPCODE_BRK: + /* FIXME */ return 0; break; case TGSI_OPCODE_IF: + /* FIXME */ return 0; break; case TGSI_OPCODE_BGNFOR: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_REP: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_ELSE: + /* FIXME */ return 0; break; case TGSI_OPCODE_ENDIF: + /* FIXME */ return 0; break; case TGSI_OPCODE_ENDFOR: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_ENDREP: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_PUSHA: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_POPA: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_CEIL: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + tmp0 = lp_build_ceil(&bld->base, tmp0); + emit_store( bld, inst, 0, chan_index, tmp0); + } break; case TGSI_OPCODE_I2F: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_NOT: + /* deprecated? */ + assert(0); return 0; break; -#if 0 case TGSI_OPCODE_TRUNC: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_f2it( bld, 0 ); - emit_i2f( bld, 0 ); + tmp0 = lp_build_trunc(&bld->base, tmp0); emit_store( bld, inst, 0, chan_index, tmp0); } break; -#endif case TGSI_OPCODE_SHL: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_SHR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_AND: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_OR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_MOD: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_XOR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_SAD: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_TXF: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_TXQ: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_CONT: + /* deprecated? */ + assert(0); return 0; break; @@ -1261,6 +1341,19 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = bld->base.zero; + emit_store( bld, inst, 0, chan_index, tmp0); + } + break; + + case TGSI_OPCODE_NOP: + break; + default: return 0; } -- cgit v1.2.3 From faec23387e035bcdd413b7364933d36a8ec22dba Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Sep 2009 14:42:52 +0100 Subject: llvmpipe: Delay storing into the dst register to prevent clobbering the src registers. How I'm thankful for regular expressions -- just a couple of them were all that was needed to do this otherwise tiresome and bug prone change. --- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 237 +++++++++++-------------- 1 file changed, 103 insertions(+), 134 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index c6488d073c..98d31f122f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -243,13 +243,13 @@ static void emit_tex( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, boolean apply_lodbias, - boolean projected) + boolean projected, + LLVMValueRef *texel) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; LLVMValueRef lodbias; LLVMValueRef oow; LLVMValueRef coords[3]; - LLVMValueRef texel[4]; unsigned num_coords; unsigned i; @@ -294,10 +294,6 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, bld->base.type, unit, num_coords, coords, lodbias, texel); - - FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) { - emit_store( bld, inst, 0, i, texel[i] ); - } } @@ -377,17 +373,26 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) static int emit_instruction( struct lp_build_tgsi_soa_context *bld, - struct tgsi_full_instruction *inst ) + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info) { unsigned chan_index; LLVMValueRef src0, src1, src2; LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - LLVMValueRef dst0; + LLVMValueRef res; + LLVMValueRef dst0[NUM_CHANNELS]; /* we can't handle indirect addressing into temp register file yet */ if (indirect_temp_reference(inst)) return FALSE; + assert(info->num_dst <= 1); + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.undef; + } + } + switch (inst->Instruction.Opcode) { #if 0 case TGSI_OPCODE_ARL: @@ -396,7 +401,7 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_flr(bld, 0, 0); emit_f2it( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; #endif @@ -404,19 +409,17 @@ emit_instruction( case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); } break; case TGSI_OPCODE_LIT: if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { - emit_store( bld, inst, 0, CHAN_X, bld->base.one); + dst0[CHAN_X] = bld->base.one; } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { src0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0 = lp_build_max( &bld->base, src0, bld->base.zero); - emit_store( bld, inst, 0, CHAN_Y, dst0); + dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { /* XMM[1] = SrcReg[0].yyyy */ @@ -428,20 +431,19 @@ emit_instruction( tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); - dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); - emit_store( bld, inst, 0, CHAN_Z, dst0); + dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { - emit_store( bld, inst, 0, CHAN_W, bld->base.one); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_RCP: /* TGSI_OPCODE_RECIP */ src0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0 = lp_build_rcp(&bld->base, src0); + res = lp_build_rcp(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -449,9 +451,9 @@ emit_instruction( /* TGSI_OPCODE_RECIPSQRT */ src0 = emit_fetch( bld, inst, 0, CHAN_X ); src0 = lp_build_abs(&bld->base, src0); - dst0 = lp_build_rsqrt(&bld->base, src0); + res = lp_build_rsqrt(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -475,16 +477,15 @@ emit_instruction( lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = tmp0; if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) - emit_store( bld, inst, 0, CHAN_Y, tmp1); + dst0[CHAN_Y] = tmp1; if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - emit_store( bld, inst, 0, CHAN_Z, tmp2); + dst0[CHAN_Z] = tmp2; } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; @@ -510,20 +511,18 @@ emit_instruction( /* dst.x = floor(lg2(abs(src.x))) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = tmp0; /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { - tmp1 = lp_build_div( &bld->base, src0, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp1); + dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); } /* dst.z = lg2(abs(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - emit_store( bld, inst, 0, CHAN_Z, tmp2); + dst0[CHAN_Z] = tmp2; } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; @@ -531,8 +530,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_mul(&bld->base, src0, src1); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); } break; @@ -540,8 +538,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_add(&bld->base, src0, src1); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_add(&bld->base, src0, src1); } break; @@ -559,7 +556,7 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -581,28 +578,24 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_DST: IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = bld->base.one; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp0); + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - tmp0 = emit_fetch( bld, inst, 0, CHAN_Z ); - emit_store( bld, inst, 0, CHAN_Z, tmp0); + dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = emit_fetch( bld, inst, 1, CHAN_W ); - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); } break; @@ -610,8 +603,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_min( &bld->base, src0, src1 ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); } break; @@ -619,8 +611,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_max( &bld->base, src0, src1 ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); } break; @@ -630,8 +621,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -641,8 +631,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -654,7 +643,7 @@ emit_instruction( tmp2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); tmp0 = lp_build_add( &bld->base, tmp0, tmp2); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -662,8 +651,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); tmp1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_sub( &bld->base, tmp0, tmp1); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); } break; @@ -674,8 +662,7 @@ emit_instruction( src2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_sub( &bld->base, src1, src2 ); tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); - dst0 = lp_build_add( &bld->base, tmp0, src2 ); - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); } break; @@ -686,8 +673,7 @@ emit_instruction( src2 = emit_fetch( bld, inst, 2, chan_index ); tmp1 = lp_build_const_scalar(bld->base.type, 0.5); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); - dst0 = lp_build_select( &bld->base, tmp0, src0, src1 ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); } break; @@ -702,7 +688,7 @@ emit_instruction( tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ } break; @@ -711,7 +697,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); tmp0 = lp_build_floor(&bld->base, src0); tmp0 = lp_build_sub(&bld->base, tmp0, src0); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -722,23 +708,21 @@ emit_instruction( src2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_max(&bld->base, tmp0, src1); tmp0 = lp_build_min(&bld->base, tmp0, src2); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_FLR: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_floor(&bld->base, tmp0); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_floor(&bld->base, tmp0); } break; case TGSI_OPCODE_ROUND: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_round(&bld->base, tmp0); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_round(&bld->base, tmp0); } break; @@ -746,7 +730,7 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_exp2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; } @@ -755,16 +739,16 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_log2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_POW: src0 = emit_fetch( bld, inst, 0, CHAN_X ); src1 = emit_fetch( bld, inst, 1, CHAN_X ); - dst0 = lp_build_pow( &bld->base, src0, src1 ); + res = lp_build_pow( &bld->base, src0, src1 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -785,7 +769,7 @@ emit_instruction( tmp5 = tmp3; tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); - emit_store( bld, inst, 0, CHAN_X, tmp2); + dst0[CHAN_X] = tmp2; } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { @@ -796,25 +780,23 @@ emit_instruction( tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp3); + dst0[CHAN_Y] = tmp3; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); - emit_store( bld, inst, 0, CHAN_Z, tmp5); + dst0[CHAN_Z] = tmp5; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_ABS: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_abs( &bld->base, tmp0 ) ; - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); } break; @@ -838,7 +820,7 @@ emit_instruction( tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -846,7 +828,7 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_cos( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -896,15 +878,13 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; case TGSI_OPCODE_SFL: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0 = bld->base.zero; - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = bld->base.zero; } break; @@ -913,8 +893,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -922,7 +901,7 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_sin( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -931,8 +910,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -941,20 +919,18 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; case TGSI_OPCODE_STR: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0 = bld->base.one; - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = bld->base.one; } break; case TGSI_OPCODE_TEX: - emit_tex( bld, inst, FALSE, FALSE ); + emit_tex( bld, inst, FALSE, FALSE, dst0 ); break; case TGSI_OPCODE_TXD: @@ -1005,7 +981,7 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_rnd( bld, 0, 0 ); emit_f2it( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; #endif @@ -1033,9 +1009,7 @@ emit_instruction( /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_sgn( &bld->base, tmp0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); } break; @@ -1045,34 +1019,29 @@ emit_instruction( src1 = emit_fetch( bld, inst, 1, chan_index ); src2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); - dst0 = lp_build_select( &bld->base, tmp0, src1, src2); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); } break; case TGSI_OPCODE_SCS: IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_cos( &bld->base, tmp0 ); - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_sin( &bld->base, tmp0 ); - emit_store( bld, inst, 0, CHAN_Y, tmp0); + dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - tmp0 = bld->base.zero; - emit_store( bld, inst, 0, CHAN_Z, tmp0); + dst0[CHAN_Z] = bld->base.zero; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_TXB: - emit_tex( bld, inst, TRUE, FALSE ); + emit_tex( bld, inst, TRUE, FALSE, dst0 ); break; case TGSI_OPCODE_NRM: @@ -1131,33 +1100,28 @@ emit_instruction( /* dst.x = xmm1 * src.x */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { - tmp4 = lp_build_mul( &bld->base, tmp4, tmp1); - emit_store(bld, inst, 0, CHAN_X, tmp4); + dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); } /* dst.y = xmm1 * src.y */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { - tmp5 = lp_build_mul( &bld->base, tmp5, tmp1); - emit_store(bld, inst, 0, CHAN_Y, tmp5); + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); } /* dst.z = xmm1 * src.z */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { - tmp6 = lp_build_mul( &bld->base, tmp6, tmp1); - emit_store(bld, inst, 0, CHAN_Z, tmp6); + dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); } /* dst.w = xmm1 * src.w */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { - tmp7 = lp_build_mul( &bld->base, tmp7, tmp1); - emit_store(bld, inst, 0, CHAN_W, tmp7); + dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); } } - /* dst0.w = 1.0 */ + /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { - tmp0 = bld->base.one; - emit_store(bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } } break; @@ -1177,16 +1141,16 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ } break; case TGSI_OPCODE_TXL: - emit_tex( bld, inst, TRUE, FALSE ); + emit_tex( bld, inst, TRUE, FALSE, dst0 ); break; case TGSI_OPCODE_TXP: - emit_tex( bld, inst, FALSE, TRUE ); + emit_tex( bld, inst, FALSE, TRUE, dst0 ); break; case TGSI_OPCODE_BRK: @@ -1248,8 +1212,7 @@ emit_instruction( case TGSI_OPCODE_CEIL: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_ceil(&bld->base, tmp0); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); } break; @@ -1268,8 +1231,7 @@ emit_instruction( case TGSI_OPCODE_TRUNC: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_trunc(&bld->base, tmp0); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); } break; @@ -1346,8 +1308,7 @@ emit_instruction( case TGSI_OPCODE_NOISE3: case TGSI_OPCODE_NOISE4: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = bld->base.zero; - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = bld->base.zero; } break; @@ -1358,6 +1319,12 @@ emit_instruction( return 0; } + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_store( bld, inst, 0, chan_index, dst0[chan_index]); + } + } + return 1; } @@ -1399,12 +1366,14 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) { + { unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - info ? info->mnemonic : ""); - } + if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) + _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", + info ? info->mnemonic : ""); + } + break; case TGSI_TOKEN_TYPE_IMMEDIATE: -- cgit v1.2.3 From 5e13e987da6ce656b08f6c25f8d373c80949e3b0 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Sep 2009 16:12:48 +0100 Subject: llvmpipe: Use const keyword for input array arguments. --- src/gallium/drivers/llvmpipe/lp_bld_logic.c | 2 +- src/gallium/drivers/llvmpipe/lp_bld_logic.h | 2 +- src/gallium/drivers/llvmpipe/lp_bld_swizzle.c | 15 ++++++++------- src/gallium/drivers/llvmpipe/lp_bld_swizzle.h | 4 ++-- 4 files changed, 12 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index d566780e5d..995a69c0f4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -337,7 +337,7 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - boolean cond[4]) + const boolean cond[4]) { const union lp_type type = bld->type; const unsigned n = type.length; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index 29b9e1c45b..9099e0fb5b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -66,7 +66,7 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - boolean cond[4]); + const boolean cond[4]); #endif /* !LP_BLD_LOGIC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c index ac7eed9379..f35638be44 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c @@ -161,7 +161,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef lp_build_swizzle1_aos(struct lp_build_context *bld, LLVMValueRef a, - unsigned char swizzle[4]) + const unsigned char swizzle[4]) { const unsigned n = bld->type.length; unsigned i, j; @@ -192,7 +192,7 @@ LLVMValueRef lp_build_swizzle2_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - unsigned char swizzle[4]) + const unsigned char swizzle[4]) { const unsigned n = bld->type.length; unsigned i, j; @@ -201,11 +201,12 @@ lp_build_swizzle2_aos(struct lp_build_context *bld, return lp_build_swizzle1_aos(bld, a, swizzle); if(a == b) { - swizzle[0] %= 4; - swizzle[1] %= 4; - swizzle[2] %= 4; - swizzle[3] %= 4; - return lp_build_swizzle1_aos(bld, a, swizzle); + unsigned char swizzle1[4]; + swizzle1[0] = swizzle[0] % 4; + swizzle1[1] = swizzle[1] % 4; + swizzle1[2] = swizzle[2] % 4; + swizzle1[3] = swizzle[3] % 4; + return lp_build_swizzle1_aos(bld, a, swizzle1); } if(swizzle[0] % 4 == 0 && diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h index d7dd6a8a60..cb0b6707ec 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h @@ -73,7 +73,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef lp_build_swizzle1_aos(struct lp_build_context *bld, LLVMValueRef a, - unsigned char swizzle[4]); + const unsigned char swizzle[4]); /** @@ -85,7 +85,7 @@ LLVMValueRef lp_build_swizzle2_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - unsigned char swizzle[4]); + const unsigned char swizzle[4]); #endif /* !LP_BLD_SWIZZLE_H */ -- cgit v1.2.3 From 4b32dd30072b5889ca1efcd5ac8bdbf14b1e2bb5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Sep 2009 16:13:12 +0100 Subject: llvmpipe: Remove dead references to pipe_winsys. --- src/gallium/drivers/llvmpipe/lp_screen.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 125035771e..0ce1a37bd4 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,8 +27,6 @@ #include "util/u_memory.h" -#include "util/u_simple_screen.h" -#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -196,8 +194,7 @@ static void llvmpipe_destroy_screen( struct pipe_screen *_screen ) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - - struct pipe_winsys *winsys = _screen->winsys; + struct llvmpipe_winsys *winsys = screen->winsys; lp_jit_screen_cleanup(screen); -- cgit v1.2.3 From 86226d5ea186d3fc6013bc40a341e0c0a891de39 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Sep 2009 16:22:27 +0100 Subject: llvmpipe: Compute derivatives. --- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 91 ++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 98d31f122f..b106ce2317 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -78,6 +78,11 @@ #define CHAN_Z 2 #define CHAN_W 3 +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 + struct lp_build_tgsi_soa_context { @@ -97,6 +102,51 @@ struct lp_build_tgsi_soa_context }; +static const unsigned char +swizzle_left[4] = { + QUAD_TOP_LEFT, QUAD_TOP_LEFT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT +}; + +static const unsigned char +swizzle_right[4] = { + QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, + QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT +}; + +static const unsigned char +swizzle_top[4] = { + QUAD_TOP_LEFT, QUAD_TOP_RIGHT, + QUAD_TOP_LEFT, QUAD_TOP_RIGHT +}; + +static const unsigned char +swizzle_bottom[4] = { + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT +}; + + +static LLVMValueRef +emit_ddx(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); + LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); + return lp_build_sub(&bld->base, src_right, src_left); +} + + +static LLVMValueRef +emit_ddy(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); + LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); + return lp_build_sub(&bld->base, src_top, src_bottom); +} + + /** * Register fetch. */ @@ -184,6 +234,36 @@ emit_fetch( } +/** + * Register fetch with derivatives. + */ +static void +emit_fetch_deriv( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + unsigned index, + const unsigned chan_index, + LLVMValueRef *res, + LLVMValueRef *ddx, + LLVMValueRef *ddy) +{ + LLVMValueRef src; + + src = emit_fetch(bld, inst, index, chan_index); + + if(res) + *res = src; + + /* TODO: use interpolation coeffs for inputs */ + + if(ddx) + *ddx = emit_ddx(bld, src); + + if(ddy) + *ddy = emit_ddy(bld, src); +} + + /** * Register store. */ @@ -239,6 +319,7 @@ emit_store( * High-level instruction translators. */ + static void emit_tex( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, @@ -833,13 +914,15 @@ emit_instruction( break; case TGSI_OPCODE_DDX: - /* FIXME */ - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); + } break; case TGSI_OPCODE_DDY: - /* FIXME */ - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); + } break; case TGSI_OPCODE_KILP: -- cgit v1.2.3 From b4835ea03d64261da5a892f9590c9977b06920e8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 14 Sep 2009 11:05:06 +0100 Subject: llvmpipe: Make lp_type a regular union. Union not worth the hassle of violating C99 or adding a name to the structure. --- src/gallium/drivers/llvmpipe/lp_bld_alpha.c | 2 +- src/gallium/drivers/llvmpipe/lp_bld_alpha.h | 4 +- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 46 ++++---- src/gallium/drivers/llvmpipe/lp_bld_arit.h | 2 +- src/gallium/drivers/llvmpipe/lp_bld_blend.h | 6 +- src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 2 +- src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c | 2 +- src/gallium/drivers/llvmpipe/lp_bld_const.c | 30 +++--- src/gallium/drivers/llvmpipe/lp_bld_const.h | 32 +++--- src/gallium/drivers/llvmpipe/lp_bld_conv.c | 32 +++--- src/gallium/drivers/llvmpipe/lp_bld_conv.h | 14 +-- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 8 +- src/gallium/drivers/llvmpipe/lp_bld_depth.h | 6 +- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 2 +- src/gallium/drivers/llvmpipe/lp_bld_flow.h | 4 +- src/gallium/drivers/llvmpipe/lp_bld_format.h | 6 +- src/gallium/drivers/llvmpipe/lp_bld_format_soa.c | 6 +- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 2 +- src/gallium/drivers/llvmpipe/lp_bld_interp.h | 2 +- src/gallium/drivers/llvmpipe/lp_bld_logic.c | 6 +- src/gallium/drivers/llvmpipe/lp_bld_logic.h | 2 +- src/gallium/drivers/llvmpipe/lp_bld_sample.h | 4 +- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 8 +- src/gallium/drivers/llvmpipe/lp_bld_swizzle.c | 6 +- src/gallium/drivers/llvmpipe/lp_bld_swizzle.h | 2 +- src/gallium/drivers/llvmpipe/lp_bld_tgsi.h | 6 +- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 2 +- src/gallium/drivers/llvmpipe/lp_bld_type.c | 25 ++--- src/gallium/drivers/llvmpipe/lp_bld_type.h | 123 +++++++++++----------- src/gallium/drivers/llvmpipe/lp_state_fs.c | 16 +-- src/gallium/drivers/llvmpipe/lp_test.h | 20 ++-- src/gallium/drivers/llvmpipe/lp_test_blend.c | 18 ++-- src/gallium/drivers/llvmpipe/lp_test_conv.c | 72 ++++++------- src/gallium/drivers/llvmpipe/lp_test_main.c | 20 ++-- src/gallium/drivers/llvmpipe/lp_tex_sample_c.c | 2 +- src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c | 2 +- 36 files changed, 270 insertions(+), 272 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index 49c2f911af..2b4bc5c819 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -45,7 +45,7 @@ void lp_build_alpha_test(LLVMBuilderRef builder, const struct pipe_alpha_state *state, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 9dbcdb4daa..634575670d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -38,14 +38,14 @@ #include struct pipe_alpha_state; -union lp_type; +struct lp_type; struct lp_build_mask_context; void lp_build_alpha_test(LLVMBuilderRef builder, const struct pipe_alpha_state *state, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index ce3e5f91c0..0b115fc9b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -65,7 +65,7 @@ lp_build_min_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; @@ -113,7 +113,7 @@ lp_build_max_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; @@ -159,7 +159,7 @@ LLVMValueRef lp_build_comp(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->one) return bld->zero; @@ -188,7 +188,7 @@ lp_build_add(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; if(a == bld->zero) @@ -241,7 +241,7 @@ lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; if(b == bld->zero) @@ -405,7 +405,7 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->zero; @@ -477,7 +477,7 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->zero; @@ -590,7 +590,7 @@ LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); if(!type.sign) @@ -627,7 +627,7 @@ LLVMValueRef lp_build_sgn(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef cond; LLVMValueRef res; @@ -678,7 +678,7 @@ lp_build_round_sse41(struct lp_build_context *bld, LLVMValueRef a, enum lp_build_round_sse41_mode mode) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); const char *intrinsic; @@ -706,7 +706,7 @@ LLVMValueRef lp_build_round(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; assert(type.floating); @@ -724,7 +724,7 @@ LLVMValueRef lp_build_floor(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; assert(type.floating); @@ -742,7 +742,7 @@ LLVMValueRef lp_build_ceil(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; assert(type.floating); @@ -760,7 +760,7 @@ LLVMValueRef lp_build_trunc(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; assert(type.floating); @@ -782,7 +782,7 @@ LLVMValueRef lp_build_int(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); assert(type.floating); @@ -805,7 +805,7 @@ LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -823,7 +823,7 @@ LLVMValueRef lp_build_rcp(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->undef; @@ -854,7 +854,7 @@ LLVMValueRef lp_build_rsqrt(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; assert(type.floating); @@ -875,7 +875,7 @@ LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -895,7 +895,7 @@ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -966,7 +966,7 @@ lp_build_polynomial(struct lp_build_context *bld, const double *coeffs, unsigned num_coeffs) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res = NULL; unsigned i; @@ -1014,7 +1014,7 @@ lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart = NULL; @@ -1107,7 +1107,7 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef *p_floor_log2, LLVMValueRef *p_log2) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 5e083b847f..d68a97c4b8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -40,7 +40,7 @@ #include -union lp_type type; +struct lp_type type; struct lp_build_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h index d19e18846c..da272e549f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -46,7 +46,7 @@ struct pipe_blend_state; -union lp_type; +struct lp_type; struct lp_build_context; @@ -74,7 +74,7 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef lp_build_blend_aos(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef const_, @@ -84,7 +84,7 @@ lp_build_blend_aos(LLVMBuilderRef builder, void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef const_[4], diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index c11a9398f8..d14f468ba9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -303,7 +303,7 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef lp_build_blend_aos(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef const_, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index b92254a7d6..9511299d55 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -199,7 +199,7 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef con[4], diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c index 21487365ea..c8eaa8c394 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.c @@ -42,7 +42,7 @@ unsigned -lp_mantissa(union lp_type type) +lp_mantissa(struct lp_type type) { assert(type.floating); @@ -72,7 +72,7 @@ lp_mantissa(union lp_type type) * Same as lp_const_scale(), but in terms of shifts. */ unsigned -lp_const_shift(union lp_type type) +lp_const_shift(struct lp_type type) { if(type.floating) return 0; @@ -86,7 +86,7 @@ lp_const_shift(union lp_type type) unsigned -lp_const_offset(union lp_type type) +lp_const_offset(struct lp_type type) { if(type.floating || type.fixed) return 0; @@ -104,7 +104,7 @@ lp_const_offset(union lp_type type) * else for the fixed points types and normalized integers. */ double -lp_const_scale(union lp_type type) +lp_const_scale(struct lp_type type) { unsigned long long llscale; double dscale; @@ -122,7 +122,7 @@ lp_const_scale(union lp_type type) * Minimum value representable by the type. */ double -lp_const_min(union lp_type type) +lp_const_min(struct lp_type type) { unsigned bits; @@ -158,7 +158,7 @@ lp_const_min(union lp_type type) * Maximum value representable by the type. */ double -lp_const_max(union lp_type type) +lp_const_max(struct lp_type type) { unsigned bits; @@ -190,7 +190,7 @@ lp_const_max(union lp_type type) double -lp_const_eps(union lp_type type) +lp_const_eps(struct lp_type type) { if (type.floating) { switch(type.width) { @@ -211,7 +211,7 @@ lp_const_eps(union lp_type type) LLVMValueRef -lp_build_undef(union lp_type type) +lp_build_undef(struct lp_type type) { LLVMTypeRef vec_type = lp_build_vec_type(type); return LLVMGetUndef(vec_type); @@ -219,7 +219,7 @@ lp_build_undef(union lp_type type) LLVMValueRef -lp_build_zero(union lp_type type) +lp_build_zero(struct lp_type type) { LLVMTypeRef vec_type = lp_build_vec_type(type); return LLVMConstNull(vec_type); @@ -227,7 +227,7 @@ lp_build_zero(union lp_type type) LLVMValueRef -lp_build_one(union lp_type type) +lp_build_one(struct lp_type type) { LLVMTypeRef elem_type; LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; @@ -269,7 +269,7 @@ lp_build_one(union lp_type type) LLVMValueRef -lp_build_const_scalar(union lp_type type, +lp_build_const_scalar(struct lp_type type, double val) { LLVMTypeRef elem_type = lp_build_elem_type(type); @@ -295,7 +295,7 @@ lp_build_const_scalar(union lp_type type, LLVMValueRef -lp_build_int_const_scalar(union lp_type type, +lp_build_int_const_scalar(struct lp_type type, long long val) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); @@ -312,7 +312,7 @@ lp_build_int_const_scalar(union lp_type type, LLVMValueRef -lp_build_const_aos(union lp_type type, +lp_build_const_aos(struct lp_type type, double r, double g, double b, double a, const unsigned char *swizzle) { @@ -352,8 +352,8 @@ lp_build_const_aos(union lp_type type, LLVMValueRef -lp_build_const_mask_aos(union lp_type type, - boolean cond[4]) +lp_build_const_mask_aos(struct lp_type type, + const boolean cond[4]) { LLVMTypeRef elem_type = LLVMIntType(type.width); LLVMValueRef masks[LP_MAX_VECTOR_LENGTH]; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h index 1934530ea3..ffb302f736 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h @@ -42,67 +42,67 @@ #include -union lp_type type; +struct lp_type type; unsigned -lp_mantissa(union lp_type type); +lp_mantissa(struct lp_type type); unsigned -lp_const_shift(union lp_type type); +lp_const_shift(struct lp_type type); unsigned -lp_const_offset(union lp_type type); +lp_const_offset(struct lp_type type); double -lp_const_scale(union lp_type type); +lp_const_scale(struct lp_type type); double -lp_const_min(union lp_type type); +lp_const_min(struct lp_type type); double -lp_const_max(union lp_type type); +lp_const_max(struct lp_type type); double -lp_const_eps(union lp_type type); +lp_const_eps(struct lp_type type); LLVMValueRef -lp_build_undef(union lp_type type); +lp_build_undef(struct lp_type type); LLVMValueRef -lp_build_zero(union lp_type type); +lp_build_zero(struct lp_type type); LLVMValueRef -lp_build_one(union lp_type type); +lp_build_one(struct lp_type type); LLVMValueRef -lp_build_const_scalar(union lp_type type, +lp_build_const_scalar(struct lp_type type, double val); LLVMValueRef -lp_build_int_const_scalar(union lp_type type, +lp_build_int_const_scalar(struct lp_type type, long long val); LLVMValueRef -lp_build_const_aos(union lp_type type, +lp_build_const_aos(struct lp_type type, double r, double g, double b, double a, const unsigned char *swizzle); LLVMValueRef -lp_build_const_mask_aos(union lp_type type, - boolean cond[4]); +lp_build_const_mask_aos(struct lp_type type, + const boolean cond[4]); #endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index c5a71d2c72..186cac70f6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -86,7 +86,7 @@ */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, - union lp_type src_type, + struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { @@ -152,7 +152,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, - union lp_type dst_type, + struct lp_type dst_type, LLVMValueRef src) { LLVMTypeRef vec_type = lp_build_vec_type(dst_type); @@ -248,8 +248,8 @@ lp_build_const_pack_shuffle(unsigned n) */ static void lp_build_expand(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, LLVMValueRef src, LLVMValueRef *dst, unsigned num_dsts) { @@ -266,7 +266,7 @@ lp_build_expand(LLVMBuilderRef builder, dst[0] = src; while(src_type.width < dst_type.width) { - union lp_type new_type = src_type; + struct lp_type new_type = src_type; LLVMTypeRef new_vec_type; new_type.width *= 2; @@ -314,8 +314,8 @@ lp_build_expand(LLVMBuilderRef builder, */ static LLVMValueRef lp_build_pack2(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, boolean clamped, LLVMValueRef lo, LLVMValueRef hi) @@ -392,8 +392,8 @@ lp_build_pack2(LLVMBuilderRef builder, */ static LLVMValueRef lp_build_pack(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, boolean clamped, const LLVMValueRef *src, unsigned num_srcs) { @@ -410,7 +410,7 @@ lp_build_pack(LLVMBuilderRef builder, tmp[i] = src[i]; while(src_type.width > dst_type.width) { - union lp_type new_type = src_type; + struct lp_type new_type = src_type; new_type.width /= 2; new_type.length *= 2; @@ -442,12 +442,12 @@ lp_build_pack(LLVMBuilderRef builder, */ void lp_build_conv(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { - union lp_type tmp_type; + struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; unsigned i; @@ -470,7 +470,7 @@ lp_build_conv(LLVMBuilderRef builder, * Clamp if necessary */ - if(src_type.value != dst_type.value) { + if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { struct lp_build_context bld; double src_min = lp_const_min(src_type); double dst_min = lp_const_min(dst_type); @@ -656,8 +656,8 @@ lp_build_conv(LLVMBuilderRef builder, */ void lp_build_conv_mask(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h index 05c1ef2a10..ca378804d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h @@ -40,33 +40,33 @@ #include -union lp_type type; +struct lp_type type; LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, - union lp_type src_type, + struct lp_type src_type, unsigned dst_width, LLVMValueRef src); LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, - union lp_type dst_type, + struct lp_type dst_type, LLVMValueRef src); void lp_build_conv(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *srcs, unsigned num_srcs, LLVMValueRef *dsts, unsigned num_dsts); void lp_build_conv_mask(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index e5fe81193f..21c665c4d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -71,11 +71,11 @@ /** * Return a type appropriate for depth/stencil testing. */ -union lp_type +struct lp_type lp_depth_type(const struct util_format_description *format_desc, unsigned length) { - union lp_type type; + struct lp_type type; unsigned swizzle; assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); @@ -85,7 +85,7 @@ lp_depth_type(const struct util_format_description *format_desc, swizzle = format_desc->swizzle[0]; assert(swizzle < 4); - type.value = 0; + memset(&type, 0, sizeof type); type.width = format_desc->block.bits; if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { @@ -114,7 +114,7 @@ lp_depth_type(const struct util_format_description *format_desc, void lp_build_depth_test(LLVMBuilderRef builder, const struct pipe_depth_state *state, - union lp_type type, + struct lp_type type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef src, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 5d2e042fcc..79d6981bb5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -41,11 +41,11 @@ struct pipe_depth_state; struct util_format_description; -union lp_type; +struct lp_type; struct lp_build_mask_context; -union lp_type +struct lp_type lp_depth_type(const struct util_format_description *format_desc, unsigned length); @@ -53,7 +53,7 @@ lp_depth_type(const struct util_format_description *format_desc, void lp_build_depth_test(LLVMBuilderRef builder, const struct pipe_depth_state *state, - union lp_type type, + struct lp_type type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef src, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 69ed014ff3..dcc25fbff8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -405,7 +405,7 @@ lp_build_mask_check(struct lp_build_mask_context *mask) void lp_build_mask_begin(struct lp_build_mask_context *mask, struct lp_build_flow_context *flow, - union lp_type type, + struct lp_type type, LLVMValueRef value) { memset(mask, 0, sizeof *mask); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 9d76e3064d..e61999ff06 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -38,7 +38,7 @@ #include -union lp_type; +struct lp_type; struct lp_build_flow_context; @@ -84,7 +84,7 @@ struct lp_build_mask_context void lp_build_mask_begin(struct lp_build_mask_context *mask, struct lp_build_flow_context *flow, - union lp_type type, + struct lp_type type, LLVMValueRef value); /** diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 5ee0656093..6d3f692619 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -39,7 +39,7 @@ #include "pipe/p_format.h" struct util_format_description; -union lp_type; +struct lp_type; /** @@ -103,7 +103,7 @@ lp_build_gather(LLVMBuilderRef builder, void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, - union lp_type type, + struct lp_type type, LLVMValueRef packed, LLVMValueRef *rgba); @@ -111,7 +111,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, void lp_build_load_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, - union lp_type type, + struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offsets, LLVMValueRef *rgba); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c index 569e8d10a3..b5ff434e1a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -84,7 +84,7 @@ lp_build_gather(LLVMBuilderRef builder, static LLVMValueRef -lp_build_format_swizzle(union lp_type type, +lp_build_format_swizzle(struct lp_type type, const LLVMValueRef *inputs, enum util_format_swizzle swizzle) { @@ -110,7 +110,7 @@ lp_build_format_swizzle(union lp_type type, void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, - union lp_type type, + struct lp_type type, LLVMValueRef packed, LLVMValueRef *rgba) { @@ -188,7 +188,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, void lp_build_load_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, - union lp_type type, + struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offsets, LLVMValueRef *rgba) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index cfe20a0d75..338dbca6d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -292,7 +292,7 @@ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 9194f6233a..9c57a10879 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -83,7 +83,7 @@ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 995a69c0f4..6b6f820769 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -45,7 +45,7 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); @@ -301,7 +301,7 @@ lp_build_select(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - union lp_type type = bld->type; + struct lp_type type = bld->type; LLVMValueRef res; if(a == b) @@ -339,7 +339,7 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef b, const boolean cond[4]) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index 9099e0fb5b..a4ee7723b5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -42,7 +42,7 @@ #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ -union lp_type type; +struct lp_type type; struct lp_build_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h index 6f565af76d..403d0e4836 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -40,7 +40,7 @@ struct pipe_texture; struct pipe_sampler_state; -union lp_type; +struct lp_type; /** @@ -123,7 +123,7 @@ void lp_build_sample_soa(LLVMBuilderRef builder, const struct lp_sampler_static_state *static_state, struct lp_sampler_dynamic_state *dynamic_state, - union lp_type fp_type, + struct lp_type fp_type, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 3ca25b0e76..08b1dc10f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -97,15 +97,15 @@ struct lp_build_sample_context const struct util_format_description *format_desc; /** Incoming coordinates type and build context */ - union lp_type coord_type; + struct lp_type coord_type; struct lp_build_context coord_bld; /** Integer coordinates */ - union lp_type int_coord_type; + struct lp_type int_coord_type; struct lp_build_context int_coord_bld; /** Output texels type and build context */ - union lp_type texel_type; + struct lp_type texel_type; struct lp_build_context texel_bld; }; @@ -337,7 +337,7 @@ void lp_build_sample_soa(LLVMBuilderRef builder, const struct lp_sampler_static_state *static_state, struct lp_sampler_dynamic_state *dynamic_state, - union lp_type type, + struct lp_type type, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c index f35638be44..64e81f7b1f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c @@ -64,7 +64,7 @@ LLVMValueRef lp_build_broadcast_scalar(struct lp_build_context *bld, LLVMValueRef scalar) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; unsigned i; @@ -83,7 +83,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef a, unsigned channel) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; @@ -115,7 +115,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, * YY00 YY00 .... YY00 * YYYY YYYY .... YYYY <= output */ - union lp_type type4 = type; + struct lp_type type4 = type; const char shifts[4][2] = { { 1, 2}, {-1, 2}, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h index cb0b6707ec..1f6da80448 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h @@ -40,7 +40,7 @@ #include -union lp_type type; +struct lp_type type; struct lp_build_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h index 10c251c416..eddb7a83fa 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h @@ -39,7 +39,7 @@ struct tgsi_token; -union lp_type; +struct lp_type; struct lp_build_context; struct lp_build_mask_context; @@ -60,7 +60,7 @@ struct lp_build_sampler_soa void (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, @@ -72,7 +72,7 @@ struct lp_build_sampler_soa void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, const LLVMValueRef *pos, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index b106ce2317..adc81569ed 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -1415,7 +1415,7 @@ emit_instruction( void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, const LLVMValueRef *pos, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 577644b7ab..606243d6c5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -33,7 +33,7 @@ LLVMTypeRef -lp_build_elem_type(union lp_type type) +lp_build_elem_type(struct lp_type type) { if (type.floating) { switch(type.width) { @@ -55,7 +55,7 @@ lp_build_elem_type(union lp_type type) LLVMTypeRef -lp_build_vec_type(union lp_type type) +lp_build_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_elem_type(type); return LLVMVectorType(elem_type, type.length); @@ -69,7 +69,7 @@ lp_build_vec_type(union lp_type type) * type and check for identity. */ boolean -lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type) { LLVMTypeKind elem_kind; @@ -107,7 +107,7 @@ lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) boolean -lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type) { LLVMTypeRef elem_type; @@ -128,7 +128,7 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) boolean -lp_check_value(union lp_type type, LLVMValueRef val) +lp_check_value(struct lp_type type, LLVMValueRef val) { LLVMTypeRef vec_type; @@ -143,25 +143,26 @@ lp_check_value(union lp_type type, LLVMValueRef val) LLVMTypeRef -lp_build_int_elem_type(union lp_type type) +lp_build_int_elem_type(struct lp_type type) { return LLVMIntType(type.width); } LLVMTypeRef -lp_build_int_vec_type(union lp_type type) +lp_build_int_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); return LLVMVectorType(elem_type, type.length); } -union lp_type -lp_int_type(union lp_type type) +struct lp_type +lp_int_type(struct lp_type type) { - union lp_type int_type; - int_type.value = 0; + struct lp_type int_type; + + memset(&int_type, 0, sizeof int_type); int_type.width = type.width; int_type.length = type.length; return int_type; @@ -171,7 +172,7 @@ lp_int_type(union lp_type type) void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, - union lp_type type) + struct lp_type type) { bld->builder = builder; bld->type = type; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 9933e0b45c..ee5ca3483c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -56,58 +56,55 @@ * on the types used for intermediate computations, such as signed vs unsigned, * normalized values, or fixed point. */ -union lp_type { - struct { - /** - * Floating-point. Cannot be used with fixed. Integer numbers are - * represented by this zero. - */ - unsigned floating:1; - - /** - * Fixed-point. Cannot be used with floating. Integer numbers are - * represented by this zero. - */ - unsigned fixed:1; - - /** - * Whether it can represent negative values or not. - * - * If this is not set for floating point, it means that all values are - * assumed to be positive. - */ - unsigned sign:1; - - /** - * Whether values are normalized to fit [0, 1] interval, or [-1, 1] - * interval for signed types. - * - * For integer types it means the representable integer range should be - * interpreted as the interval above. - * - * For floating and fixed point formats it means the values should be - * clamped to the interval above. - */ - unsigned norm:1; - - /** - * Element width. - * - * For fixed point values, the fixed point is assumed to be at half the - * width. - */ - unsigned width:14; - - /** - * Vector length. - * - * width*length should be a power of two greater or equal to eight. - * - * @sa LP_MAX_VECTOR_LENGTH - */ - unsigned length:14; - }; - uint32_t value; +struct lp_type { + /** + * Floating-point. Cannot be used with fixed. Integer numbers are + * represented by this zero. + */ + unsigned floating:1; + + /** + * Fixed-point. Cannot be used with floating. Integer numbers are + * represented by this zero. + */ + unsigned fixed:1; + + /** + * Whether it can represent negative values or not. + * + * If this is not set for floating point, it means that all values are + * assumed to be positive. + */ + unsigned sign:1; + + /** + * Whether values are normalized to fit [0, 1] interval, or [-1, 1] + * interval for signed types. + * + * For integer types it means the representable integer range should be + * interpreted as the interval above. + * + * For floating and fixed point formats it means the values should be + * clamped to the interval above. + */ + unsigned norm:1; + + /** + * Element width. + * + * For fixed point values, the fixed point is assumed to be at half the + * width. + */ + unsigned width:14; + + /** + * Vector length. + * + * width*length should be a power of two greater or equal to eight. + * + * @sa LP_MAX_VECTOR_LENGTH + */ + unsigned length:14; }; @@ -124,7 +121,7 @@ struct lp_build_context * This not only describes the input/output LLVM types, but also whether * to normalize/clamp the results. */ - union lp_type type; + struct lp_type type; /** Same as lp_build_undef(type) */ LLVMValueRef undef; @@ -138,41 +135,41 @@ struct lp_build_context LLVMTypeRef -lp_build_elem_type(union lp_type type); +lp_build_elem_type(struct lp_type type); LLVMTypeRef -lp_build_vec_type(union lp_type type); +lp_build_vec_type(struct lp_type type); boolean -lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type); +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type); boolean -lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type); +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type); boolean -lp_check_value(union lp_type type, LLVMValueRef val); +lp_check_value(struct lp_type type, LLVMValueRef val); LLVMTypeRef -lp_build_int_elem_type(union lp_type type); +lp_build_int_elem_type(struct lp_type type); LLVMTypeRef -lp_build_int_vec_type(union lp_type type); +lp_build_int_vec_type(struct lp_type type); -union lp_type -lp_int_type(union lp_type type); +struct lp_type +lp_int_type(struct lp_type type); void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, - union lp_type type); + struct lp_type type); #endif /* !LP_BLD_TYPE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 618cf1ffb8..9faed5a0b1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -133,13 +133,13 @@ generate_pos0(LLVMBuilderRef builder, static void generate_depth(LLVMBuilderRef builder, const struct lp_fragment_shader_variant_key *key, - union lp_type src_type, + struct lp_type src_type, struct lp_build_mask_context *mask, LLVMValueRef src, LLVMValueRef dst_ptr) { const struct util_format_description *format_desc; - union lp_type dst_type; + struct lp_type dst_type; if(!key->depth.enabled) return; @@ -181,7 +181,7 @@ generate_fs(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef context_ptr, unsigned i, const struct lp_build_interp_soa_context *interp, @@ -299,7 +299,7 @@ generate_fs(struct llvmpipe_context *lp, static void generate_blend(const struct pipe_blend_state *blend, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, @@ -364,8 +364,8 @@ generate_fragment(struct llvmpipe_context *lp, { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader_variant *variant; - union lp_type fs_type; - union lp_type blend_type; + struct lp_type fs_type; + struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; @@ -431,7 +431,7 @@ generate_fragment(struct llvmpipe_context *lp, /* TODO: actually pick these based on the fs and color buffer * characteristics. */ - fs_type.value = 0; + memset(&fs_type, 0, sizeof fs_type); fs_type.floating = TRUE; /* floating point values */ fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ @@ -439,7 +439,7 @@ generate_fragment(struct llvmpipe_context *lp, fs_type.length = 4; /* 4 element per vector */ num_fs = 4; - blend_type.value = 0; + memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ blend_type.sign = FALSE; /* values are unsigned */ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index 69aaae26e0..a88e110c66 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -86,43 +86,43 @@ random_float(void); void -dump_type(FILE *fp, union lp_type type); +dump_type(FILE *fp, struct lp_type type); double -read_elem(union lp_type type, const void *src, unsigned index); +read_elem(struct lp_type type, const void *src, unsigned index); void -write_elem(union lp_type type, void *dst, unsigned index, double src); +write_elem(struct lp_type type, void *dst, unsigned index, double src); void -random_elem(union lp_type type, void *dst, unsigned index); +random_elem(struct lp_type type, void *dst, unsigned index); void -read_vec(union lp_type type, const void *src, double *dst); +read_vec(struct lp_type type, const void *src, double *dst); void -write_vec(union lp_type type, void *dst, const double *src); +write_vec(struct lp_type type, void *dst, const double *src); void -random_vec(union lp_type type, void *dst); +random_vec(struct lp_type type, void *dst); boolean -compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps); +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps); boolean -compare_vec(union lp_type type, const void *res, const void *ref); +compare_vec(struct lp_type type, const void *res, const void *ref); void -dump_vec(FILE *fp, union lp_type type, const void *src); +dump_vec(FILE *fp, struct lp_type type, const void *src); #endif /* !LP_TEST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 8dfad468e3..1b57ea2a5c 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -80,7 +80,7 @@ static void write_tsv_row(FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type, + struct lp_type type, double cycles, boolean success) { @@ -125,7 +125,7 @@ static void dump_blend_type(FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { fprintf(fp, "%s", mode ? "soa" : "aos"); @@ -153,7 +153,7 @@ static LLVMValueRef add_blend_test(LLVMModuleRef module, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { LLVMTypeRef ret_type; LLVMTypeRef vec_type; @@ -467,7 +467,7 @@ test_one(unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; @@ -765,10 +765,10 @@ blend_funcs[] = { }; -const union lp_type blend_types[] = { +const struct lp_type blend_types[] = { /* float, fixed, sign, norm, width, len */ - {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }}, /* f32 x 4 */ - {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, /* u8n x 16 */ + { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ + { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ }; @@ -788,7 +788,7 @@ test_all(unsigned verbose, FILE *fp) const unsigned *alpha_dst_factor; struct pipe_blend_state blend; enum vector_mode mode; - const union lp_type *type; + const struct lp_type *type; bool success = TRUE; for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) { @@ -841,7 +841,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) const unsigned *alpha_dst_factor; struct pipe_blend_state blend; enum vector_mode mode; - const union lp_type *type; + const struct lp_type *type; unsigned long i; bool success = TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index e6489834af..ae2697143f 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -59,8 +59,8 @@ write_tsv_header(FILE *fp) static void write_tsv_row(FILE *fp, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, double cycles, boolean success) { @@ -80,8 +80,8 @@ write_tsv_row(FILE *fp, static void dump_conv_types(FILE *fp, - union lp_type src_type, - union lp_type dst_type) + struct lp_type src_type, + struct lp_type dst_type) { fprintf(fp, "src_type="); dump_type(fp, src_type); @@ -96,8 +96,8 @@ dump_conv_types(FILE *fp, static LLVMValueRef add_conv_test(LLVMModuleRef module, - union lp_type src_type, unsigned num_srcs, - union lp_type dst_type, unsigned num_dsts) + struct lp_type src_type, unsigned num_srcs, + struct lp_type dst_type, unsigned num_dsts) { LLVMTypeRef args[2]; LLVMValueRef func; @@ -145,8 +145,8 @@ add_conv_test(LLVMModuleRef module, static boolean test_one(unsigned verbose, FILE *fp, - union lp_type src_type, - union lp_type dst_type) + struct lp_type src_type, + struct lp_type dst_type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; @@ -343,35 +343,35 @@ test_one(unsigned verbose, } -const union lp_type conv_types[] = { +const struct lp_type conv_types[] = { /* float, fixed, sign, norm, width, len */ - {{ TRUE, FALSE, TRUE, TRUE, 32, 4 }}, - {{ TRUE, FALSE, TRUE, FALSE, 32, 4 }}, - {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }}, - {{ TRUE, FALSE, FALSE, FALSE, 32, 4 }}, + { TRUE, FALSE, TRUE, TRUE, 32, 4 }, + { TRUE, FALSE, TRUE, FALSE, 32, 4 }, + { TRUE, FALSE, FALSE, TRUE, 32, 4 }, + { TRUE, FALSE, FALSE, FALSE, 32, 4 }, /* TODO: test fixed formats too */ - {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 32, 4 }}, - {{ FALSE, FALSE, TRUE, FALSE, 32, 4 }}, - {{ FALSE, FALSE, FALSE, TRUE, 32, 4 }}, - {{ FALSE, FALSE, FALSE, FALSE, 32, 4 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 8, 16 }}, - {{ FALSE, FALSE, TRUE, FALSE, 8, 16 }}, - {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, - {{ FALSE, FALSE, FALSE, FALSE, 8, 16 }}, + { FALSE, FALSE, TRUE, TRUE, 16, 8 }, + { FALSE, FALSE, TRUE, FALSE, 16, 8 }, + { FALSE, FALSE, FALSE, TRUE, 16, 8 }, + { FALSE, FALSE, FALSE, FALSE, 16, 8 }, + + { FALSE, FALSE, TRUE, TRUE, 32, 4 }, + { FALSE, FALSE, TRUE, FALSE, 32, 4 }, + { FALSE, FALSE, FALSE, TRUE, 32, 4 }, + { FALSE, FALSE, FALSE, FALSE, 32, 4 }, + + { FALSE, FALSE, TRUE, TRUE, 16, 8 }, + { FALSE, FALSE, TRUE, FALSE, 16, 8 }, + { FALSE, FALSE, FALSE, TRUE, 16, 8 }, + { FALSE, FALSE, FALSE, FALSE, 16, 8 }, + + { FALSE, FALSE, TRUE, TRUE, 8, 16 }, + { FALSE, FALSE, TRUE, FALSE, 8, 16 }, + { FALSE, FALSE, FALSE, TRUE, 8, 16 }, + { FALSE, FALSE, FALSE, FALSE, 8, 16 }, }; @@ -381,8 +381,8 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]); boolean test_all(unsigned verbose, FILE *fp) { - const union lp_type *src_type; - const union lp_type *dst_type; + const struct lp_type *src_type; + const struct lp_type *dst_type; bool success = TRUE; for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) { @@ -407,8 +407,8 @@ test_all(unsigned verbose, FILE *fp) boolean test_some(unsigned verbose, FILE *fp, unsigned long n) { - const union lp_type *src_type; - const union lp_type *dst_type; + const struct lp_type *src_type; + const struct lp_type *dst_type; unsigned long i; bool success = TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 49213fb4f0..c3bb8fadf7 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -40,7 +40,7 @@ void dump_type(FILE *fp, - union lp_type type) + struct lp_type type) { fprintf(fp, "%s%s%u%sx%u", type.sign ? (type.floating || type.fixed ? "" : "s") : "u", @@ -52,7 +52,7 @@ dump_type(FILE *fp, double -read_elem(union lp_type type, const void *src, unsigned index) +read_elem(struct lp_type type, const void *src, unsigned index) { double scale = lp_const_scale(type); double value; @@ -115,7 +115,7 @@ read_elem(union lp_type type, const void *src, unsigned index) void -write_elem(union lp_type type, void *dst, unsigned index, double value) +write_elem(struct lp_type type, void *dst, unsigned index, double value) { assert(index < type.length); if(!type.sign && value < 0.0) @@ -184,7 +184,7 @@ write_elem(union lp_type type, void *dst, unsigned index, double value) void -random_elem(union lp_type type, void *dst, unsigned index) +random_elem(struct lp_type type, void *dst, unsigned index) { double value; assert(index < type.length); @@ -209,7 +209,7 @@ random_elem(union lp_type type, void *dst, unsigned index) void -read_vec(union lp_type type, const void *src, double *dst) +read_vec(struct lp_type type, const void *src, double *dst) { unsigned i; for (i = 0; i < type.length; ++i) @@ -218,7 +218,7 @@ read_vec(union lp_type type, const void *src, double *dst) void -write_vec(union lp_type type, void *dst, const double *src) +write_vec(struct lp_type type, void *dst, const double *src) { unsigned i; for (i = 0; i < type.length; ++i) @@ -234,7 +234,7 @@ random_float(void) void -random_vec(union lp_type type, void *dst) +random_vec(struct lp_type type, void *dst) { unsigned i; for (i = 0; i < type.length; ++i) @@ -243,7 +243,7 @@ random_vec(union lp_type type, void *dst) boolean -compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps) +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps) { unsigned i; for (i = 0; i < type.length; ++i) { @@ -259,7 +259,7 @@ compare_vec_with_eps(union lp_type type, const void *res, const void *ref, doubl boolean -compare_vec(union lp_type type, const void *res, const void *ref) +compare_vec(struct lp_type type, const void *res, const void *ref) { double eps = lp_const_eps(type); return compare_vec_with_eps(type, res, ref, eps); @@ -267,7 +267,7 @@ compare_vec(union lp_type type, const void *res, const void *ref) void -dump_vec(FILE *fp, union lp_type type, const void *src) +dump_vec(FILE *fp, struct lp_type type, const void *src) { unsigned i; for (i = 0; i < type.length; ++i) { diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index 9a876f404d..a1365a045f 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -1654,7 +1654,7 @@ lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) static void lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c index 7d31705d01..d2a6ae21f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -149,7 +149,7 @@ lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) static void lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, -- cgit v1.2.3 From 459ea0095c31eff835b25dd3eef48a4c073d05f9 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 16 Sep 2009 10:39:06 +0100 Subject: llvmpipe: Make the code portable for MinGW. --- scons/llvm.py | 6 +++++- src/gallium/drivers/llvmpipe/SConscript | 2 +- src/gallium/drivers/llvmpipe/lp_jit.c | 2 +- src/gallium/drivers/llvmpipe/lp_test_blend.c | 16 ++++++++-------- src/gallium/drivers/llvmpipe/lp_test_conv.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_test_main.c | 8 ++++---- 6 files changed, 21 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/scons/llvm.py b/scons/llvm.py index 46a8d829ca..d3293bb404 100644 --- a/scons/llvm.py +++ b/scons/llvm.py @@ -51,7 +51,9 @@ def generate(env): llvm_bin_dir = os.path.join(llvm_dir, llvm_subdir, 'bin') if not os.path.isdir(llvm_bin_dir): - raise SCons.Errors.InternalError, "LLVM build directory not found" + llvm_bin_dir = os.path.join(llvm_dir, 'bin') + if not os.path.isdir(llvm_bin_dir): + raise SCons.Errors.InternalError, "LLVM binary directory not found" env.PrependENVPath('PATH', llvm_bin_dir) @@ -65,6 +67,8 @@ def generate(env): except OSError: print 'llvm-config version %s failed' % version else: + if env['platform'] == 'windows': + env.Append(LIBS = ['imagehlp', 'psapi']) env['LINK'] = env['CXX'] env['LLVM_VERSION'] = version diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index dea4b703c4..f4a9a3b22e 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -3,7 +3,7 @@ Import('*') env = env.Clone() env.Tool('llvm') -if env.has_key('LLVM_VERSION') is False: +if not env.has_key('LLVM_VERSION'): print 'warning: LLVM not found: not building llvmpipe' Return() diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 9465f763d5..b4a22ff4a9 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -152,7 +152,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module); if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) { - fprintf(stderr, "%s\n", error); + _debug_printf("%s\n", error); LLVMDisposeMessage(error); abort(); } diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 1b57ea2a5c..94b661dcba 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -846,22 +846,22 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) bool success = TRUE; for(i = 0; i < n; ++i) { - rgb_func = &blend_funcs[random() % num_funcs]; - alpha_func = &blend_funcs[random() % num_funcs]; - rgb_src_factor = &blend_factors[random() % num_factors]; - alpha_src_factor = &blend_factors[random() % num_factors]; + rgb_func = &blend_funcs[rand() % num_funcs]; + alpha_func = &blend_funcs[rand() % num_funcs]; + rgb_src_factor = &blend_factors[rand() % num_factors]; + alpha_src_factor = &blend_factors[rand() % num_factors]; do { - rgb_dst_factor = &blend_factors[random() % num_factors]; + rgb_dst_factor = &blend_factors[rand() % num_factors]; } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); do { - alpha_dst_factor = &blend_factors[random() % num_factors]; + alpha_dst_factor = &blend_factors[rand() % num_factors]; } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); - mode = random() & 1; + mode = rand() & 1; - type = &blend_types[random() % num_types]; + type = &blend_types[rand() % num_types]; memset(&blend, 0, sizeof blend); blend.blend_enable = 1; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index ae2697143f..9dcf58e5dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -413,10 +413,10 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) bool success = TRUE; for(i = 0; i < n; ++i) { - src_type = &conv_types[random() % num_types]; + src_type = &conv_types[rand() % num_types]; do { - dst_type = &conv_types[random() % num_types]; + dst_type = &conv_types[rand() % num_types]; } while (src_type == dst_type || src_type->norm != dst_type->norm); if(!test_one(verbose, fp, *src_type, *dst_type)) diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index c3bb8fadf7..4592dc0b2d 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -188,7 +188,7 @@ random_elem(struct lp_type type, void *dst, unsigned index) { double value; assert(index < type.length); - value = (double)random()/(double)RAND_MAX; + value = (double)rand()/(double)RAND_MAX; if(!type.norm) { unsigned long long mask; if (type.floating) @@ -199,10 +199,10 @@ random_elem(struct lp_type type, void *dst, unsigned index) mask = ((unsigned long long)1 << (type.width - 1)) - 1; else mask = ((unsigned long long)1 << type.width) - 1; - value += (double)(mask & random()); + value += (double)(mask & rand()); } if(!type.sign) - if(random() & 1) + if(rand() & 1) value = -value; write_elem(type, dst, index, value); } @@ -229,7 +229,7 @@ write_vec(struct lp_type type, void *dst, const double *src) float random_float(void) { - return (float)((double)random()/(double)RAND_MAX); + return (float)((double)rand()/(double)RAND_MAX); } -- cgit v1.2.3 From c29905aa318cf9ed782935552fa983b048646984 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 16 Sep 2009 14:31:14 +0100 Subject: gallium: Deprecate PIPE_CAP_S3TC. No longer used. S3TC support is queried via pipe_screen::is_format_supported. --- src/gallium/drivers/cell/ppu/cell_screen.c | 2 -- src/gallium/drivers/i915simple/i915_screen.c | 2 -- src/gallium/drivers/i965simple/brw_screen.c | 2 -- src/gallium/drivers/llvmpipe/lp_screen.c | 2 -- src/gallium/drivers/nv04/nv04_screen.c | 2 -- src/gallium/drivers/nv10/nv10_screen.c | 2 -- src/gallium/drivers/nv20/nv20_screen.c | 2 -- src/gallium/drivers/nv30/nv30_screen.c | 2 -- src/gallium/drivers/nv40/nv40_screen.c | 2 -- src/gallium/drivers/nv50/nv50_screen.c | 2 -- src/gallium/drivers/r300/r300_screen.c | 2 -- src/gallium/drivers/softpipe/sp_screen.c | 2 -- src/gallium/include/pipe/p_defines.h | 2 +- 13 files changed, 1 insertion(+), 25 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index bd48ce7005..9161747fdb 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -64,8 +64,6 @@ cell_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 9f017a14cc..a1dd43c1bc 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -101,8 +101,6 @@ i915_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index b22e105f10..fb68fd624b 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -85,8 +85,6 @@ brw_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 0ce1a37bd4..ff7ef8658a 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -65,8 +65,6 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index ff2febb668..170ce3eb7e 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -16,8 +16,6 @@ nv04_screen_get_param(struct pipe_screen *screen, int param) return 0; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 4469b22d91..ee5901e743 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -15,8 +15,6 @@ nv10_screen_get_param(struct pipe_screen *screen, int param) return 0; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index e6924ad71e..4eeacd1afd 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -15,8 +15,6 @@ nv20_screen_get_param(struct pipe_screen *screen, int param) return 0; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index f8285e4455..41af38450b 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -22,8 +22,6 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 5d2a4216c5..bd13dfddd1 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -21,8 +21,6 @@ nv40_screen_get_param(struct pipe_screen *pscreen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 7adaaaa135..3b08e1b89f 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -87,8 +87,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 593178c50b..3b5b1bbd37 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -93,8 +93,6 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) } else { return 0; } - case PIPE_CAP_S3TC: - return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index da97e7ac04..ce77018415 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -65,8 +65,6 @@ softpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index b01ab6d137..f252d6df00 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -281,7 +281,7 @@ enum pipe_transfer_usage { #define PIPE_CAP_NPOT_TEXTURES 2 #define PIPE_CAP_TWO_SIDED_STENCIL 3 #define PIPE_CAP_GLSL 4 /* XXX need something better */ -#define PIPE_CAP_S3TC 5 +#define PIPE_CAP_S3TC 5 /* XXX: deprecated; cap determined via supported sampler formats */ #define PIPE_CAP_ANISOTROPIC_FILTER 6 #define PIPE_CAP_POINT_SPRITE 7 #define PIPE_CAP_MAX_RENDER_TARGETS 8 -- cgit v1.2.3 From f911d196cf7bdf2d922e11de8ab35649eb6a748c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 16 Sep 2009 16:12:22 +0100 Subject: llvmpipe: Don't assert due to unsupported texture wrap modes. Issue a warning and fallback to clamping. --- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 08b1dc10f3..8ca1be6f1b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -208,6 +208,11 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: /* FIXME */ + _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode); + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + default: assert(0); } -- cgit v1.2.3 From 18d0f9a7a38674367eca25e87f67ddf423d8c4f7 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 17 Sep 2009 16:05:08 +0100 Subject: llvmpipe: Respect input interpolators for the shader. Cherry-picked from fb2c7b6743ba6e89f24843890fb7fcd6a09c3dbb --- src/gallium/drivers/llvmpipe/lp_state_derived.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index e87976b9f3..30fb41ea65 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -93,6 +93,23 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) vinfo->num_attribs = 0; for (i = 0; i < lpfs->info.num_inputs; i++) { int src; + enum interp_mode interp; + + switch (lpfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + interp = INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + interp = INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = INTERP_PERSPECTIVE; + break; + default: + assert(0); + interp = INTERP_LINEAR; + } + switch (lpfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: src = draw_find_vs_output(llvmpipe->draw, @@ -108,7 +125,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) case TGSI_SEMANTIC_FOG: src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; case TGSI_SEMANTIC_GENERIC: @@ -116,7 +133,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) /* this includes texcoords and varying vars */ src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC, lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; default: -- cgit v1.2.3 From 76c2e34b22836c3a71a096be5620ded97a2ae636 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 20 Sep 2009 12:28:07 +0100 Subject: llvmpipe: Update tile status on flush. --- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 143afec3d3..ddda5650a9 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -225,11 +225,14 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) tc->clear_val); screen->transfer_unmap(screen, pt); + + tile->status = LP_TILE_STATUS_UNDEFINED; break; } case LP_TILE_STATUS_DEFINED: lp_put_tile_rgba_soa(pt, x, y, tile->color); + tile->status = LP_TILE_STATUS_UNDEFINED; break; } } -- cgit v1.2.3 From 911a7a82cd44e89dd7c24a256a0a172f01eadde3 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 20 Sep 2009 18:04:00 +0100 Subject: llvmpipe: Fix lp_get_cached_tile. Align coordinates to tile boundaries. --- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index ddda5650a9..2e576e6039 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -260,7 +260,7 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, case LP_TILE_STATUS_UNDEFINED: /* get new tile data from transfer */ - lp_get_tile_rgba_soa(pt, x, y, tile->color); + lp_get_tile_rgba_soa(pt, x & ~(TILE_SIZE - 1), y & ~(TILE_SIZE - 1), tile->color); tile->status = LP_TILE_STATUS_DEFINED; break; -- cgit v1.2.3