From c208a2c791fa24c7c5887fc496738cbddbfafc72 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 28 Jul 2008 12:42:13 +0900 Subject: Merge tgsi/exec and tgsi/util directories. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2275 ++++++++++++++++++++++++++++++++ 1 file changed, 2275 insertions(+) create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sse2.c (limited to 'src/gallium/auxiliary/tgsi/tgsi_sse2.c') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c new file mode 100644 index 0000000000..0cb1f11ef2 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -0,0 +1,2275 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_exec.h" +#include "tgsi_sse2.h" + +#include "rtasm/rtasm_x86sse.h" + +#ifdef PIPE_ARCH_X86 + +/* for 1/sqrt() + * + * This costs about 100fps (close to 10%) in gears: + */ +#define HIGH_PRECISION 1 + + +#define FOR_EACH_CHANNEL( CHAN )\ + for( CHAN = 0; CHAN < 4; CHAN++ ) + +#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + if( IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ + FOR_EACH_CHANNEL( CHAN )\ + IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +/** + * X86 utility functions. + */ + +static struct x86_reg +make_xmm( + unsigned xmm ) +{ + return x86_make_reg( + file_XMM, + (enum x86_reg_name) xmm ); +} + +/** + * X86 register mapping helpers. + */ + +static struct x86_reg +get_const_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_CX ); +} + +static struct x86_reg +get_input_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_AX ); +} + +static struct x86_reg +get_output_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DX ); +} + +static struct x86_reg +get_temp_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_BX ); +} + +static struct x86_reg +get_coef_base( void ) +{ + return get_output_base(); +} + +static struct x86_reg +get_immediate_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DI ); +} + + +/** + * Data access helpers. + */ + + +static struct x86_reg +get_immediate( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_immediate_base(), + (vec * 4 + chan) * 4 ); +} + +static struct x86_reg +get_const( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_const_base(), + (vec * 4 + chan) * 4 ); +} + +static struct x86_reg +get_input( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_input_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_output( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_output_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_temp( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_temp_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_coef( + unsigned vec, + unsigned chan, + unsigned member ) +{ + return x86_make_disp( + get_coef_base(), + ((vec * 3 + member) * 4 + chan) * 4 ); +} + + +static void +emit_ret( + struct x86_function *func ) +{ + x86_ret( func ); +} + + +/** + * Data fetch helpers. + */ + +/** + * Copy a shader constant to xmm register + * \param xmm the destination xmm register + * \param vec the src const buffer index + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_const( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_const( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +static void +emit_immediate( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_immediate( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + + +/** + * Copy a shader input to xmm register + * \param xmm the destination xmm register + * \param vec the src input attrib + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_inputf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + make_xmm( xmm ), + get_input( vec, chan ) ); +} + +/** + * Store an xmm register to a shader output + * \param xmm the source xmm register + * \param vec the dest output attrib + * \param chan src dest channel to store (X, Y, Z or W) + */ +static void +emit_output( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + get_output( vec, chan ), + make_xmm( xmm ) ); +} + +/** + * Copy a shader temporary to xmm register + * \param xmm the destination xmm register + * \param vec the src temp register + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_tempf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movaps( + func, + make_xmm( xmm ), + get_temp( vec, chan ) ); +} + +/** + * Load an xmm register with an input attrib coefficient (a0, dadx or dady) + * \param xmm the destination xmm register + * \param vec the src input/attribute coefficient index + * \param chan src channel to fetch (X, Y, Z or W) + * \param member 0=a0, 1=dadx, 2=dady + */ +static void +emit_coef( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan, + unsigned member ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_coef( vec, chan, member ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +/** + * Data store helpers. + */ + +static void +emit_inputs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + get_input( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_temps( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movaps( + func, + get_temp( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_addrs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_temps( + func, + xmm, + vec + TGSI_EXEC_NUM_TEMPS, + chan ); +} + +/** + * Coefficent fetch helpers. + */ + +static void +emit_coef_a0( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 0 ); +} + +static void +emit_coef_dadx( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 1 ); +} + +static void +emit_coef_dady( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 2 ); +} + +/** + * Function call helpers. + */ + +static void +emit_push_gp( + struct x86_function *func ) +{ + x86_push( + func, + x86_make_reg( file_REG32, reg_AX) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_CX) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX) ); +} + +static void +x86_pop_gp( + struct x86_function *func ) +{ + /* Restore GP registers in a reverse order. + */ + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_CX) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX) ); +} + +static void +emit_func_call_dst( + struct x86_function *func, + unsigned xmm_dst, + void (PIPE_CDECL *code)() ) +{ + sse_movaps( + func, + get_temp( TEMP_R0, 0 ), + make_xmm( xmm_dst ) ); + + emit_push_gp( + func ); + + { + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + + x86_lea( + func, + ecx, + get_temp( TEMP_R0, 0 ) ); + + x86_push( func, ecx ); + x86_mov_reg_imm( func, ecx, (unsigned long) code ); + x86_call( func, ecx ); + x86_pop(func, ecx ); + } + + + x86_pop_gp( + func ); + + sse_movaps( + func, + make_xmm( xmm_dst ), + get_temp( TEMP_R0, 0 ) ); +} + +static void +emit_func_call_dst_src( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src, + void (PIPE_CDECL *code)() ) +{ + sse_movaps( + func, + get_temp( TEMP_R0, 1 ), + make_xmm( xmm_src ) ); + + emit_func_call_dst( + func, + xmm_dst, + code ); +} + +/** + * Low-level instruction translators. + */ + +static void +emit_abs( + struct x86_function *func, + unsigned xmm ) +{ + sse_andps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_7FFFFFFF_I, + TGSI_EXEC_TEMP_7FFFFFFF_C ) ); +} + +static void +emit_add( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_addps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void PIPE_CDECL +cos4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = cosf( store[X + 0] ); + store[X + 1] = cosf( store[X + 1] ); + store[X + 2] = cosf( store[X + 2] ); + store[X + 3] = cosf( store[X + 3] ); +} + +static void +emit_cos( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + cos4f ); +} + +static void PIPE_CDECL +ex24f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = powf( 2.0f, store[X + 0] ); + store[X + 1] = powf( 2.0f, store[X + 1] ); + store[X + 2] = powf( 2.0f, store[X + 2] ); + store[X + 3] = powf( 2.0f, store[X + 3] ); +} + +static void +emit_ex2( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + ex24f ); +} + +static void +emit_f2it( + struct x86_function *func, + unsigned xmm ) +{ + sse2_cvttps2dq( + func, + make_xmm( xmm ), + make_xmm( xmm ) ); +} + +static void PIPE_CDECL +flr4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = floorf( store[X + 0] ); + store[X + 1] = floorf( store[X + 1] ); + store[X + 2] = floorf( store[X + 2] ); + store[X + 3] = floorf( store[X + 3] ); +} + +static void +emit_flr( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + flr4f ); +} + +static void PIPE_CDECL +frc4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] -= floorf( store[X + 0] ); + store[X + 1] -= floorf( store[X + 1] ); + store[X + 2] -= floorf( store[X + 2] ); + store[X + 3] -= floorf( store[X + 3] ); +} + +static void +emit_frc( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + frc4f ); +} + +static void PIPE_CDECL +lg24f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = LOG2( store[X + 0] ); + store[X + 1] = LOG2( store[X + 1] ); + store[X + 2] = LOG2( store[X + 2] ); + store[X + 3] = LOG2( store[X + 3] ); +} + +static void +emit_lg2( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + lg24f ); +} + +static void +emit_MOV( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_movups( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_mul (struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src) +{ + sse_mulps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_neg( + struct x86_function *func, + unsigned xmm ) +{ + sse_xorps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void PIPE_CDECL +pow4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = powf( store[X + 0], store[X + 4] ); + store[X + 1] = powf( store[X + 1], store[X + 5] ); + store[X + 2] = powf( store[X + 2], store[X + 6] ); + store[X + 3] = powf( store[X + 3], store[X + 7] ); +} + +static void +emit_pow( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + emit_func_call_dst_src( + func, + xmm_dst, + xmm_src, + pow4f ); +} + +static void +emit_rcp ( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. Need to either emit a proper divide or use the + * iterative technique described below in emit_rsqrt(). + */ + sse2_rcpps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_rsqrt( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ +#if HIGH_PRECISION + /* Although rsqrtps() and rcpps() are low precision on some/all SSE + * implementations, it is possible to improve its precision at + * fairly low cost, using a newton/raphson step, as below: + * + * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) + * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] + * + * See: http://softwarecommunity.intel.com/articles/eng/1818.htm + */ + { + struct x86_reg dst = make_xmm( xmm_dst ); + struct x86_reg src = make_xmm( xmm_src ); + struct x86_reg tmp0 = make_xmm( 2 ); + struct x86_reg tmp1 = make_xmm( 3 ); + + assert( xmm_dst != xmm_src ); + assert( xmm_dst != 2 && xmm_dst != 3 ); + assert( xmm_src != 2 && xmm_src != 3 ); + + sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); + sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); + sse_rsqrtps( func, tmp1, src ); + sse_mulps( func, src, tmp1 ); + sse_mulps( func, dst, tmp1 ); + sse_mulps( func, src, tmp1 ); + sse_subps( func, tmp0, src ); + sse_mulps( func, dst, tmp0 ); + } +#else + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. + */ + sse_rsqrtps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +#endif +} + +static void +emit_setsign( + struct x86_function *func, + unsigned xmm ) +{ + sse_orps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void PIPE_CDECL +sin4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = sinf( store[X + 0] ); + store[X + 1] = sinf( store[X + 1] ); + store[X + 2] = sinf( store[X + 2] ); + store[X + 3] = sinf( store[X + 3] ); +} + +static void +emit_sin (struct x86_function *func, + unsigned xmm_dst) +{ + emit_func_call_dst( + func, + xmm_dst, + sin4f ); +} + +static void +emit_sub( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_subps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +/** + * Register fetch. + */ + +static void +emit_fetch( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_src_register *reg, + const unsigned chan_index ) +{ + unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( reg->SrcRegister.File ) { + case TGSI_FILE_CONSTANT: + emit_const( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_IMMEDIATE: + emit_immediate( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_INPUT: + emit_inputf( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_TEMPORARY: + emit_tempf( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + break; + + case TGSI_EXTSWIZZLE_ONE: + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + break; + + default: + assert( 0 ); + } + + switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { + case TGSI_UTIL_SIGN_CLEAR: + emit_abs( func, xmm ); + break; + + case TGSI_UTIL_SIGN_SET: + emit_setsign( func, xmm ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + emit_neg( func, xmm ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } +} + +#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ + emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) + +/** + * Register store. + */ + +static void +emit_store( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + unsigned chan_index ) +{ + switch( reg->DstRegister.File ) { + case TGSI_FILE_OUTPUT: + emit_output( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + case TGSI_FILE_TEMPORARY: + emit_temps( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + case TGSI_FILE_ADDRESS: + emit_addrs( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + default: + assert( 0 ); + } + + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + /* assert( 0 ); */ + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } +} + +#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ + emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + +/** + * High-level instruction translators. + */ + +static void +emit_kil( + struct x86_function *func, + const struct tgsi_full_src_register *reg ) +{ + unsigned uniquemask; + unsigned registers[4]; + unsigned nextregister = 0; + unsigned firstchan = ~0; + unsigned chan_index; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + FOR_EACH_CHANNEL( chan_index ) { + unsigned swizzle; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle( + reg, + chan_index ); + + /* check if the component has not been already tested */ + if( !(uniquemask & (1 << swizzle)) ) { + uniquemask |= 1 << swizzle; + + /* allocate register */ + registers[chan_index] = nextregister; + emit_fetch( + func, + nextregister, + reg, + chan_index ); + nextregister++; + + /* mark the first channel used */ + if( firstchan == ~0 ) { + firstchan = chan_index; + } + } + } + + x86_push( + func, + x86_make_reg( file_REG32, reg_AX ) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX ) ); + + FOR_EACH_CHANNEL( chan_index ) { + if( uniquemask & (1 << chan_index) ) { + sse_cmpps( + func, + make_xmm( registers[chan_index] ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + + if( chan_index == firstchan ) { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_AX ), + make_xmm( registers[chan_index] ) ); + } + else { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_DX ), + make_xmm( registers[chan_index] ) ); + x86_or( + func, + x86_make_reg( file_REG32, reg_AX ), + x86_make_reg( file_REG32, reg_DX ) ); + } + } + } + + x86_or( + func, + get_temp( + TGSI_EXEC_TEMP_KILMASK_I, + TGSI_EXEC_TEMP_KILMASK_C ), + x86_make_reg( file_REG32, reg_AX ) ); + + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX ) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX ) ); +} + +static void +emit_setcc( + struct x86_function *func, + struct tgsi_full_instruction *inst, + enum sse_cc cc ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + make_xmm( 1 ), + cc ); + sse_andps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} + +static void +emit_cmp( + struct x86_function *func, + struct tgsi_full_instruction *inst ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + sse_andps( + func, + make_xmm( 1 ), + make_xmm( 0 ) ); + sse_andnps( + func, + make_xmm( 0 ), + make_xmm( 2 ) ); + sse_orps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} + +static int +emit_instruction( + struct x86_function *func, + struct tgsi_full_instruction *inst ) +{ + unsigned chan_index; + + switch( inst->Instruction.Opcode ) { + case TGSI_OPCODE_ARL: +#if 0 + /* XXX this isn't working properly (see glean vertProg1 test) */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_f2it( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } +#else + return 0; +#endif + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C); + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + STORE( func, *inst, 0, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( func, *inst, 0, 0, CHAN_W ); + } + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_maxps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + /* XMM[1] = SrcReg[0].yyyy */ + FETCH( func, *inst, 1, 0, CHAN_Y ); + /* XMM[1] = max(XMM[1], 0) */ + sse_maxps( + func, + make_xmm( 1 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + /* XMM[2] = SrcReg[0].wwww */ + FETCH( func, *inst, 2, 0, CHAN_W ); + /* XMM[2] = min(XMM[2], 128.0) */ + sse_minps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_128_I, + TGSI_EXEC_TEMP_128_C ) ); + /* XMM[2] = max(XMM[2], -128.0) */ + sse_maxps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_MINUS_128_I, + TGSI_EXEC_TEMP_MINUS_128_C ) ); + emit_pow( func, 1, 2 ); + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_xorps( + func, + make_xmm( 2 ), + make_xmm( 2 ) ); + sse_cmpps( + func, + make_xmm( 2 ), + make_xmm( 0 ), + cc_LessThanEqual ); + sse_andps( + func, + make_xmm( 2 ), + make_xmm( 1 ) ); + STORE( func, *inst, 2, 0, CHAN_Z ); + } + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rcp( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rsqrt( func, 1, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 1, 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + return 0; + break; + + case TGSI_OPCODE_LOG: + return 0; + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_add( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul(func, 1, 2 ); + emit_add(func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_W ); + FETCH( func, *inst, 2, 1, CHAN_W ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 1, 1, CHAN_Y ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + FETCH( func, *inst, 0, 0, CHAN_Z ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + FETCH( func, *inst, 0, 1, CHAN_W ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_minps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_maxps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + emit_setcc( func, inst, cc_LessThan ); + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + emit_setcc( func, inst, cc_NotLessThan ); + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_sub( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_sub( func, 1, 2 ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CND: + return 0; + break; + + case TGSI_OPCODE_CND0: + return 0; + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + return 0; + break; + + case TGSI_OPCODE_INDEX: + return 0; + break; + + case TGSI_OPCODE_NEGATE: + return 0; + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_frc( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + return 0; + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_flr( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + return 0; + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_ex2( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_lg2( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_pow( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 1, 1, CHAN_Z ); + FETCH( func, *inst, 3, 0, CHAN_Z ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 4, 1, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_MOV( func, 2, 0 ); + emit_mul( func, 2, 1 ); + emit_MOV( func, 5, 3 ); + emit_mul( func, 5, 4 ); + emit_sub( func, 2, 5 ); + STORE( func, *inst, 2, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 2, 1, CHAN_X ); + FETCH( func, *inst, 5, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + emit_mul( func, 3, 2 ); + emit_mul( func, 1, 5 ); + emit_sub( func, 3, 1 ); + STORE( func, *inst, 3, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_mul( func, 5, 4 ); + emit_mul( func, 0, 2 ); + emit_sub( func, 5, 0 ); + STORE( func, *inst, 5, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + return 0; + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_abs( func, 0) ; + + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RCC: + return 0; + break; + + case TGSI_OPCODE_DPH: + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 1, CHAN_W ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + return 0; + break; + + case TGSI_OPCODE_DDY: + return 0; + break; + + case TGSI_OPCODE_KIL: + emit_kil( func, &inst->FullSrcRegisters[0] ); + break; + + case TGSI_OPCODE_PK2H: + return 0; + break; + + case TGSI_OPCODE_PK2US: + return 0; + break; + + case TGSI_OPCODE_PK4B: + return 0; + break; + + case TGSI_OPCODE_PK4UB: + return 0; + break; + + case TGSI_OPCODE_RFL: + return 0; + break; + + case TGSI_OPCODE_SEQ: + return 0; + break; + + case TGSI_OPCODE_SFL: + return 0; + break; + + case TGSI_OPCODE_SGT: + return 0; + break; + + case TGSI_OPCODE_SIN: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + return 0; + break; + + case TGSI_OPCODE_SNE: + return 0; + break; + + case TGSI_OPCODE_STR: + return 0; + break; + + case TGSI_OPCODE_TEX: + if (0) { + /* Disable dummy texture code: + */ + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + } + else { + return 0; + } + break; + + case TGSI_OPCODE_TXD: + return 0; + break; + + case TGSI_OPCODE_UP2H: + return 0; + break; + + case TGSI_OPCODE_UP2US: + return 0; + break; + + case TGSI_OPCODE_UP4B: + return 0; + break; + + case TGSI_OPCODE_UP4UB: + return 0; + break; + + case TGSI_OPCODE_X2D: + return 0; + break; + + case TGSI_OPCODE_ARA: + return 0; + break; + + case TGSI_OPCODE_ARR: + return 0; + break; + + case TGSI_OPCODE_BRA: + return 0; + break; + + case TGSI_OPCODE_CAL: + return 0; + break; + + case TGSI_OPCODE_RET: + emit_ret( func ); + break; + + case TGSI_OPCODE_END: + break; + + case TGSI_OPCODE_SSG: + return 0; + break; + + case TGSI_OPCODE_CMP: + emit_cmp (func, inst); + break; + + case TGSI_OPCODE_SCS: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_TXB: + return 0; + break; + + case TGSI_OPCODE_NRM: + return 0; + break; + + case TGSI_OPCODE_DIV: + return 0; + break; + + case TGSI_OPCODE_DP2: + return 0; + break; + + case TGSI_OPCODE_TXL: + return 0; + break; + + case TGSI_OPCODE_BRK: + return 0; + break; + + case TGSI_OPCODE_IF: + return 0; + break; + + case TGSI_OPCODE_LOOP: + return 0; + break; + + case TGSI_OPCODE_REP: + return 0; + break; + + case TGSI_OPCODE_ELSE: + return 0; + break; + + case TGSI_OPCODE_ENDIF: + return 0; + break; + + case TGSI_OPCODE_ENDLOOP: + return 0; + break; + + case TGSI_OPCODE_ENDREP: + return 0; + break; + + case TGSI_OPCODE_PUSHA: + return 0; + break; + + case TGSI_OPCODE_POPA: + return 0; + break; + + case TGSI_OPCODE_CEIL: + return 0; + break; + + case TGSI_OPCODE_I2F: + return 0; + break; + + case TGSI_OPCODE_NOT: + return 0; + break; + + case TGSI_OPCODE_TRUNC: + return 0; + break; + + case TGSI_OPCODE_SHL: + return 0; + break; + + case TGSI_OPCODE_SHR: + return 0; + break; + + case TGSI_OPCODE_AND: + return 0; + break; + + case TGSI_OPCODE_OR: + return 0; + break; + + case TGSI_OPCODE_MOD: + return 0; + break; + + case TGSI_OPCODE_XOR: + return 0; + break; + + case TGSI_OPCODE_SAD: + return 0; + break; + + case TGSI_OPCODE_TXF: + return 0; + break; + + case TGSI_OPCODE_TXQ: + return 0; + break; + + case TGSI_OPCODE_CONT: + return 0; + break; + + case TGSI_OPCODE_EMIT: + return 0; + break; + + case TGSI_OPCODE_ENDPRIM: + return 0; + break; + + default: + return 0; + } + + return 1; +} + +static void +emit_declaration( + struct x86_function *func, + struct tgsi_full_declaration *decl ) +{ + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned i, j; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + emit_coef_a0( func, 0, i, j ); + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_coef_a0( func, 4, i, j ); + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 4 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); + emit_coef_a0( func, 5, i, j ); + emit_rcp( func, 4, 4 ); /* 1.0 / w */ + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 5 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ + emit_inputs( func, 0, i, j ); + break; + + default: + assert( 0 ); + break; + } + } + } + } + } +} + +static void aos_to_soa( struct x86_function *func, + uint arg_aos, + uint arg_soa, + uint arg_num, + uint arg_stride ) +{ + struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX ); + struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX ); + struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX ); + struct x86_reg stride = x86_make_reg( file_REG32, reg_DX ); + int inner_loop; + + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); + x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) ); + x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) ); + x86_mov( func, stride, x86_fn_arg( func, arg_stride ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + x86_push( func, aos_input ); + sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_pop( func, aos_input ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); + sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); + sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); + sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); + + sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); + sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); + sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); + sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); + + /* Advance to next input */ + x86_lea( func, aos_input, x86_make_disp(aos_input, 16) ); + x86_lea( func, soa_input, x86_make_disp(soa_input, 64) ); + } + /* while --num_inputs */ + x86_dec( func, num_inputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, aos_input ); +} + +static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_output; + struct x86_reg aos_output; + struct x86_reg num_outputs; + struct x86_reg temp; + int inner_loop; + + soa_output = x86_make_reg( file_REG32, reg_AX ); + aos_output = x86_make_reg( file_REG32, reg_BX ); + num_outputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, aos_output ); + + x86_mov( func, soa_output, x86_fn_arg( func, soa ) ); + x86_mov( func, aos_output, x86_fn_arg( func, aos ) ); + x86_mov( func, num_outputs, x86_fn_arg( func, num ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); + sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); + sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); + sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); + sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); + sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); + sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); + + x86_mov( func, temp, x86_fn_arg( func, stride ) ); + x86_push( func, aos_output ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_pop( func, aos_output ); + + /* Advance to next output */ + x86_lea( func, aos_output, x86_make_disp(aos_output, 16) ); + x86_lea( func, soa_output, x86_make_disp(soa_output, 64) ); + } + /* while --num_outputs */ + x86_dec( func, num_outputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, aos_output ); +} + +/** + * Translate a TGSI vertex/fragment shader to SSE2 code. + * Slightly different things are done for vertex vs. fragment shaders. + * + * Note that fragment shaders are responsible for interpolating shader + * inputs. Because on x86 we have only 4 GP registers, and here we + * have 5 shader arguments (input, output, const, temp and coef), the + * code is split into two phases -- DECLARATION and INSTRUCTION phase. + * GP register holding the output argument is aliased with the coeff + * argument, as outputs are not needed in the DECLARATION phase. + * + * \param tokens the TGSI input shader + * \param func the output SSE code/function + * \param immediates buffer to place immediates, later passed to SSE func + * \param return 1 for success, 0 if translation failed + */ +unsigned +tgsi_emit_sse2( + const struct tgsi_token *tokens, + struct x86_function *func, + float (*immediates)[4], + boolean do_swizzles ) +{ + struct tgsi_parse_context parse; + boolean instruction_phase = FALSE; + unsigned ok = 1; + uint num_immediates = 0; + + func->csr = func->store; + + tgsi_parse_init( &parse, tokens ); + + /* Can't just use EDI, EBX without save/restoring them: + */ + x86_push( + func, + get_immediate_base() ); + + x86_push( + func, + get_temp_base() ); + + + /* + * Different function args for vertex/fragment shaders: + */ + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + /* DECLARATION phase, do not load output argument. */ + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + /* skipping outputs argument here */ + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_coef_base(), + x86_fn_arg( func, 5 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 6 ) ); + } + else { + assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + + if (do_swizzles) + aos_to_soa( func, + 6, /* aos_input */ + 1, /* machine->input */ + 7, /* num_inputs */ + 8 ); /* input_stride */ + + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 5 ) ); + } + + while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration( + func, + &parse.FullToken.FullDeclaration ); + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + if( !instruction_phase ) { + /* INSTRUCTION phase, overwrite coeff with output. */ + instruction_phase = TRUE; + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + } + } + + ok = emit_instruction( + func, + &parse.FullToken.FullInstruction ); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", + parse.FullToken.FullInstruction.Instruction.Opcode, + parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? + "vertex shader" : "fragment shader"); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for( i = 0; i < size; i++ ) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } +#if 0 + debug_printf("SSE FS immediate[%d] = %f %f %f %f\n", + num_immediates, + immediates[num_immediates][0], + immediates[num_immediates][1], + immediates[num_immediates][2], + immediates[num_immediates][3]); +#endif + num_immediates++; + } + break; + + default: + ok = 0; + assert( 0 ); + } + } + + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { + if (do_swizzles) + soa_to_aos( func, 9, 2, 10, 11 ); + } + + /* Can't just use EBX, EDI without save/restoring them: + */ + x86_pop( + func, + get_temp_base() ); + + x86_pop( + func, + get_immediate_base() ); + + emit_ret( func ); + + tgsi_parse_free( &parse ); + + return ok; +} + +#endif /* PIPE_ARCH_X86 */ -- cgit v1.2.3 From faad6655946968dd16ab30cc8d5fbd5a09321976 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Aug 2008 12:00:57 -0600 Subject: gallium: distinguish between KIL and KILP Note: KIL (unconditional) not done yet. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary/tgsi/tgsi_sse2.c') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 0cb1f11ef2..f3a202ae89 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1002,7 +1002,7 @@ emit_store( */ static void -emit_kil( +emit_kilp( struct x86_function *func, const struct tgsi_full_src_register *reg ) { @@ -1096,6 +1096,15 @@ emit_kil( x86_make_reg( file_REG32, reg_AX ) ); } + +static void +emit_kil( + struct x86_function *func ) +{ + /* XXX todo / fix me */ +} + + static void emit_setcc( struct x86_function *func, @@ -1609,8 +1618,15 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_KILP: + /* predicated kill */ + emit_kilp( func, &inst->FullSrcRegisters[0] ); + break; + case TGSI_OPCODE_KIL: - emit_kil( func, &inst->FullSrcRegisters[0] ); + /* unconditional kill */ + emit_kil( func ); + return 0; /* XXX fix me */ break; case TGSI_OPCODE_PK2H: -- cgit v1.2.3 From f0874d1a6b832e1bb29256661cb8beab3ddeb528 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 11:09:20 +0200 Subject: tgsi: Swap meanings of KIL and KILP opcodes. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 22 ++++++++++++++++++---- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 12 ++++++------ 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary/tgsi/tgsi_sse2.c') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index b93f3d5222..c4ba667d32 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1189,8 +1189,8 @@ store_dest( * Kill fragment if any of the four values is less than zero. */ static void -exec_kilp(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_kil(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { uint uniquemask; uint chan_index; @@ -1226,6 +1226,21 @@ exec_kilp(struct tgsi_exec_machine *mach, mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } +/** + * Execute NVIDIA-style KIL which is predicated by a condition code. + * Kill fragment if the condition code is TRUE. + */ +static void +exec_kilp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + + /* TODO: build kilmask from CC mask */ + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + /* * Fetch a texel using STR texture coordinates. @@ -1971,8 +1986,7 @@ exec_instruction( break; case TGSI_OPCODE_KIL: - /* for enabled ExecMask bits, set the killed bit */ - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; + exec_kil (mach, inst); break; case TGSI_OPCODE_PK2H: diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index f3a202ae89..47dc06faf6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1002,7 +1002,7 @@ emit_store( */ static void -emit_kilp( +emit_kil( struct x86_function *func, const struct tgsi_full_src_register *reg ) { @@ -1098,7 +1098,7 @@ emit_kilp( static void -emit_kil( +emit_kilp( struct x86_function *func ) { /* XXX todo / fix me */ @@ -1620,13 +1620,13 @@ emit_instruction( case TGSI_OPCODE_KILP: /* predicated kill */ - emit_kilp( func, &inst->FullSrcRegisters[0] ); + emit_kilp( func ); + return 0; /* XXX fix me */ break; case TGSI_OPCODE_KIL: - /* unconditional kill */ - emit_kil( func ); - return 0; /* XXX fix me */ + /* conditional kill */ + emit_kil( func, &inst->FullSrcRegisters[0] ); break; case TGSI_OPCODE_PK2H: -- cgit v1.2.3 From 93305bd6808787964c0088a88c428ecd1208aa44 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 18 Aug 2008 18:09:23 +0200 Subject: tgsi: Use NUM_CHANNELS. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary/tgsi/tgsi_sse2.c') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 47dc06faf6..0bf260c4dd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -44,13 +44,13 @@ #define FOR_EACH_CHANNEL( CHAN )\ - for( CHAN = 0; CHAN < 4; CHAN++ ) + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - if( IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ FOR_EACH_CHANNEL( CHAN )\ @@ -1172,7 +1172,7 @@ emit_instruction( { unsigned chan_index; - switch( inst->Instruction.Opcode ) { + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: #if 0 /* XXX this isn't working properly (see glean vertProg1 test) */ -- cgit v1.2.3 From de3083be7197cccb8dbf223d644ebdb78a8c809d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 19 Aug 2008 00:42:30 +0200 Subject: tgsi: Fix ARL opcode in SSE2 codegen. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 6 +-- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 87 +++++++++++++++++++++++++--------- 2 files changed, 68 insertions(+), 25 deletions(-) (limited to 'src/gallium/auxiliary/tgsi/tgsi_sse2.c') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index a2235de059..b793e1ebae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1688,9 +1688,9 @@ exec_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_f2it( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + FETCH( &r[0], 0, chan_index ); + micro_f2it( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 0bf260c4dd..ca48d422e0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -62,6 +62,7 @@ #define CHAN_W 3 #define TEMP_R0 TGSI_EXEC_TEMP_R0 +#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR /** * X86 utility functions. @@ -215,19 +216,61 @@ emit_ret( static void emit_const( struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movss( - func, - make_xmm( xmm ), - get_const( vec, chan ) ); - sse_shufps( - func, - make_xmm( xmm ), - make_xmm( xmm ), - SHUF( 0, 0, 0, 0 ) ); + uint xmm, + int vec, + uint chan, + uint indirect, + uint indirectFile, + int indirectIndex ) +{ + if (indirect) { + struct x86_reg r0 = get_input_base(); + struct x86_reg r1 = get_output_base(); + uint i; + + assert( indirectFile == TGSI_FILE_ADDRESS ); + assert( indirectIndex == 0 ); + + x86_push( func, r0 ); + x86_push( func, r1 ); + + for (i = 0; i < QUAD_SIZE; i++) { + x86_lea( func, r0, get_const( vec, chan ) ); + x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) ); + + /* Quick hack to multiply by 16 -- need to add SHL to rtasm. + */ + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + + x86_add( func, r0, r1 ); + x86_mov( func, r1, x86_deref( r0 ) ); + x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 ); + } + + x86_pop( func, r1 ); + x86_pop( func, r0 ); + + sse_movaps( + func, + make_xmm( xmm ), + get_temp( TEMP_R0, CHAN_X ) ); + } + else { + assert( vec >= 0 ); + + sse_movss( + func, + make_xmm( xmm ), + get_const( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); + } } static void @@ -369,10 +412,12 @@ emit_addrs( unsigned vec, unsigned chan ) { + assert( vec == 0 ); + emit_temps( func, xmm, - vec + TGSI_EXEC_NUM_TEMPS, + vec + TGSI_EXEC_TEMP_ADDR, chan ); } @@ -855,18 +900,21 @@ emit_fetch( { unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); - switch( swizzle ) { + switch (swizzle) { case TGSI_EXTSWIZZLE_X: case TGSI_EXTSWIZZLE_Y: case TGSI_EXTSWIZZLE_Z: case TGSI_EXTSWIZZLE_W: - switch( reg->SrcRegister.File ) { + switch (reg->SrcRegister.File) { case TGSI_FILE_CONSTANT: emit_const( func, xmm, reg->SrcRegister.Index, - swizzle ); + swizzle, + reg->SrcRegister.Indirect, + reg->SrcRegisterInd.File, + reg->SrcRegisterInd.Index ); break; case TGSI_FILE_IMMEDIATE: @@ -1174,16 +1222,11 @@ emit_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: -#if 0 - /* XXX this isn't working properly (see glean vertProg1 test) */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); emit_f2it( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } -#else - return 0; -#endif break; case TGSI_OPCODE_MOV: -- cgit v1.2.3 From 0d9d2045e86a249a36d8eeebf59979a162c164af Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 19 Aug 2008 11:47:30 +0200 Subject: tgsi: Implement EXP opcode for SSE2. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 8 ++--- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 64 ++++++++++++++++++++++++++-------- 2 files changed, 53 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary/tgsi/tgsi_sse2.c') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index b793e1ebae..d584a4e4fa 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1756,18 +1756,18 @@ exec_instruction( micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ - STORE( &r[2], 0, CHAN_X ); /* store r2 */ + STORE( &r[2], 0, CHAN_X ); /* store r2 */ } if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ - STORE( &r[2], 0, CHAN_Y ); /* store r2 */ + STORE( &r[2], 0, CHAN_Y ); /* store r2 */ } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ - STORE( &r[2], 0, CHAN_Z ); /* store r2 */ + STORE( &r[2], 0, CHAN_Z ); /* store r2 */ } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index ca48d422e0..684bc081e5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -61,6 +61,9 @@ #define CHAN_Z 2 #define CHAN_W 3 +#define TEMP_ONE_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_ONE_C TGSI_EXEC_TEMP_ONE_C + #define TEMP_R0 TGSI_EXEC_TEMP_R0 #define TEMP_ADDR TGSI_EXEC_TEMP_ADDR @@ -958,8 +961,8 @@ emit_fetch( emit_tempf( func, xmm, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); + TEMP_ONE_I, + TEMP_ONE_C ); break; default: @@ -1173,8 +1176,8 @@ emit_setcc( func, make_xmm( 0 ), get_temp( - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ) ); + TEMP_ONE_I, + TEMP_ONE_C ) ); STORE( func, *inst, 0, 0, chan_index ); } } @@ -1243,8 +1246,8 @@ emit_instruction( emit_tempf( func, 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C); + TEMP_ONE_I, + TEMP_ONE_C); if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { STORE( func, *inst, 0, 0, CHAN_X ); } @@ -1329,7 +1332,38 @@ emit_instruction( break; case TGSI_OPCODE_EXP: - return 0; + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( func, *inst, 0, 0, CHAN_X ); + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_MOV( func, 1, 0 ); + emit_flr( func, 1 ); + /* dst.x = ex2(floor(src.x)) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { + emit_MOV( func, 2, 1 ); + emit_ex2( func, 2 ); + STORE( func, *inst, 2, 0, CHAN_X ); + } + /* dst.y = src.x - floor(src.x) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_MOV( func, 2, 0 ); + emit_sub( func, 2, 1 ); + STORE( func, *inst, 2, 0, CHAN_Y ); + } + } + /* dst.z = ex2(src.x) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + emit_ex2( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + } + /* dst.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { + emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } break; case TGSI_OPCODE_LOG: @@ -1399,8 +1433,8 @@ emit_instruction( emit_tempf( func, 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); + TEMP_ONE_I, + TEMP_ONE_C ); STORE( func, *inst, 0, 0, CHAN_X ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { @@ -1603,8 +1637,8 @@ emit_instruction( emit_tempf( func, 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); + TEMP_ONE_I, + TEMP_ONE_C ); STORE( func, *inst, 0, 0, CHAN_W ); } break; @@ -1731,8 +1765,8 @@ emit_instruction( emit_tempf( func, 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); + TEMP_ONE_I, + TEMP_ONE_C ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1820,8 +1854,8 @@ emit_instruction( emit_tempf( func, 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); + TEMP_ONE_I, + TEMP_ONE_C ); STORE( func, *inst, 0, 0, CHAN_W ); } break; -- cgit v1.2.3 From 4405e428e4c2a80172c803cc3c4933d546bf2b4d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 19 Aug 2008 12:07:25 +0200 Subject: tgsi: Implement LOG opcode for SSE2 codegen. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 8 ++++---- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary/tgsi/tgsi_sse2.c') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index d584a4e4fa..88a34a6961 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1777,18 +1777,18 @@ exec_instruction( micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[0], 0, CHAN_X ); + STORE( &r[0], 0, CHAN_X ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ - STORE( &r[0], 0, CHAN_Y ); + STORE( &r[0], 0, CHAN_Y ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[1], 0, CHAN_Z ); + STORE( &r[1], 0, CHAN_Z ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 684bc081e5..485e5a0e6f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1367,7 +1367,38 @@ emit_instruction( break; case TGSI_OPCODE_LOG: - return 0; + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_abs( func, 0 ); + emit_MOV( func, 1, 0 ); + emit_lg2( func, 1 ); + /* dst.z = lg2(abs(src.x)) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( func, *inst, 1, 0, CHAN_Z ); + } + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_flr( func, 1 ); + /* dst.x = floor(lg2(abs(src.x))) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( func, *inst, 1, 0, CHAN_X ); + } + /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_ex2( func, 1 ); + emit_rcp( func, 1, 1 ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + } + } + /* dst.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { + emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } break; case TGSI_OPCODE_MUL: -- cgit v1.2.3 From 1c2ff4d9e65563c071747a9c3bd907bd24706da0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Aug 2008 15:16:43 -0600 Subject: gallium: use new util_fast_exp2(), _log2(), pow() functions New code surrounded with #if FAST_MATH to allow comparing against original code if we need to debug. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 66 +++++++++++++++++++++++++++++++- src/gallium/auxiliary/tgsi/tgsi_exec.c | 30 +++++++++++++++ src/gallium/auxiliary/tgsi/tgsi_sse2.c | 19 +++++++++ 3 files changed, 113 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary/tgsi/tgsi_sse2.c') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 441877d46f..41bdd012d5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -31,6 +31,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" @@ -43,6 +44,7 @@ #ifdef PIPE_ARCH_X86 #define DISASSEM 0 +#define FAST_MATH 1 static const char *files[] = { @@ -1380,14 +1382,28 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } + + /* A wrapper for powf(). * Makes sure it is cdecl and operates on floats. */ static float PIPE_CDECL _powerf( float x, float y ) { +#if FAST_MATH + return util_fast_pow(x, y); +#else return powf( x, y ); +#endif } +#if FAST_MATH +static float PIPE_CDECL _exp2(float x) +{ + return util_fast_exp2(x); +} +#endif + + /* Really not sufficient -- need to check for conditions that could * generate inf/nan values, which will slow things down hugely. */ @@ -1442,6 +1458,48 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst } +#if FAST_MATH +static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + uint i; + + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) ); + + x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); + + /* tmp_EAX has been pushed & will be restored below */ + x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _exp2 ); + x86_call( cp->func, cp->tmp_EAX ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 4) ); + + x86_cdecl_caller_pop_regs( cp->func ); + + /* Note retval on x87 stack: + */ + cp->func->x87_stack++; + + x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); + + return TRUE; +} +#endif + + static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); @@ -1662,7 +1720,9 @@ emit_instruction( struct aos_compilation *cp, return emit_RND(cp, inst); case TGSI_OPCODE_EXPBASE2: -#if 0 +#if FAST_MATH + return emit_EXPBASE2(cp, inst); +#elif 0 /* this seems to fail for "larger" exponents. * See glean tvertProg1's EX2 test. */ @@ -1827,6 +1887,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, struct aos_compilation cp; unsigned fixup, label; + util_init_math(); + tgsi_parse_init( &parse, varient->base.vs->state.tokens ); memset(&cp, 0, sizeof(cp)); @@ -2135,4 +2197,4 @@ struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, -#endif +#endif /* PIPE_ARCH_X86 */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 88a34a6961..e28b56c842 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -57,6 +57,9 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" +#include "util/u_math.h" + +#define FAST_MATH 1 #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 @@ -145,6 +148,8 @@ tgsi_exec_machine_bind_shader( tgsi_dump(tokens, 0); #endif + util_init_math(); + mach->Tokens = tokens; mach->Samplers = samplers; @@ -448,10 +453,17 @@ micro_exp2( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { +#if FAST_MATH + dst->f[0] = util_fast_exp2( src->f[0] ); + dst->f[1] = util_fast_exp2( src->f[1] ); + dst->f[2] = util_fast_exp2( src->f[2] ); + dst->f[3] = util_fast_exp2( src->f[3] ); +#else dst->f[0] = powf( 2.0f, src->f[0] ); dst->f[1] = powf( 2.0f, src->f[1] ); dst->f[2] = powf( 2.0f, src->f[2] ); dst->f[3] = powf( 2.0f, src->f[3] ); +#endif } static void @@ -528,10 +540,17 @@ micro_lg2( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { +#if FAST_MATH + dst->f[0] = util_fast_log2( src->f[0] ); + dst->f[1] = util_fast_log2( src->f[1] ); + dst->f[2] = util_fast_log2( src->f[2] ); + dst->f[3] = util_fast_log2( src->f[3] ); +#else dst->f[0] = logf( src->f[0] ) * 1.442695f; dst->f[1] = logf( src->f[1] ) * 1.442695f; dst->f[2] = logf( src->f[2] ) * 1.442695f; dst->f[3] = logf( src->f[3] ) * 1.442695f; +#endif } static void @@ -796,10 +815,17 @@ micro_pow( const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { +#if FAST_MATH + dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); + dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); + dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); + dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); +#else dst->f[0] = powf( src0->f[0], src1->f[0] ); dst->f[1] = powf( src0->f[1], src1->f[1] ); dst->f[2] = powf( src0->f[2], src1->f[2] ); dst->f[3] = powf( src0->f[3], src1->f[3] ); +#endif } static void @@ -2024,7 +2050,11 @@ exec_instruction( /* TGSI_OPCODE_EX2 */ FETCH(&r[0], 0, CHAN_X); +#if FAST_MATH + micro_exp2( &r[0], &r[0] ); +#else micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); +#endif FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 485e5a0e6f..e390607023 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -27,6 +27,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" @@ -42,6 +43,8 @@ */ #define HIGH_PRECISION 1 +#define FAST_MATH 1 + #define FOR_EACH_CHANNEL( CHAN )\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) @@ -623,10 +626,17 @@ ex24f( { const unsigned X = 0; +#if FAST_MATH + store[X + 0] = util_fast_exp2( store[X + 0] ); + store[X + 1] = util_fast_exp2( store[X + 1] ); + store[X + 2] = util_fast_exp2( store[X + 2] ); + store[X + 3] = util_fast_exp2( store[X + 3] ); +#else store[X + 0] = powf( 2.0f, store[X + 0] ); store[X + 1] = powf( 2.0f, store[X + 1] ); store[X + 2] = powf( 2.0f, store[X + 2] ); store[X + 3] = powf( 2.0f, store[X + 3] ); +#endif } static void @@ -762,10 +772,17 @@ pow4f( { const unsigned X = 0; +#if FAST_MATH + store[X + 0] = util_fast_pow( store[X + 0], store[X + 4] ); + store[X + 1] = util_fast_pow( store[X + 1], store[X + 5] ); + store[X + 2] = util_fast_pow( store[X + 2], store[X + 6] ); + store[X + 3] = util_fast_pow( store[X + 3], store[X + 7] ); +#else store[X + 0] = powf( store[X + 0], store[X + 4] ); store[X + 1] = powf( store[X + 1], store[X + 5] ); store[X + 2] = powf( store[X + 2], store[X + 6] ); store[X + 3] = powf( store[X + 3], store[X + 7] ); +#endif } static void @@ -2235,6 +2252,8 @@ tgsi_emit_sse2( unsigned ok = 1; uint num_immediates = 0; + util_init_math(); + func->csr = func->store; tgsi_parse_init( &parse, tokens ); -- cgit v1.2.3 From 1a46dcc8a927dfb38ca1381e7b3dafb789f8257c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Aug 2008 15:25:21 -0600 Subject: gallium: replace LOG2() macro with util_fast_log2() inline func --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 8 ++++---- src/gallium/drivers/softpipe/sp_context.c | 3 +++ src/gallium/drivers/softpipe/sp_tex_sample.c | 3 ++- src/gallium/include/pipe/p_util.h | 15 --------------- 4 files changed, 9 insertions(+), 20 deletions(-) (limited to 'src/gallium/auxiliary/tgsi/tgsi_sse2.c') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index e390607023..00ed4da450 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -713,10 +713,10 @@ lg24f( { const unsigned X = 0; - store[X + 0] = LOG2( store[X + 0] ); - store[X + 1] = LOG2( store[X + 1] ); - store[X + 2] = LOG2( store[X + 2] ); - store[X + 3] = LOG2( store[X + 3] ); + store[X + 0] = util_fast_log2( store[X + 0] ); + store[X + 1] = util_fast_log2( store[X + 1] ); + store[X + 2] = util_fast_log2( store[X + 2] ); + store[X + 3] = util_fast_log2( store[X + 3] ); } static void diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 626c3a9d4e..9b1313bc83 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -33,6 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" +#include "util/u_math.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_flush.h" @@ -128,6 +129,8 @@ softpipe_create( struct pipe_screen *screen, struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); uint i; + util_init_math(); + #ifdef PIPE_ARCH_X86 softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE ); #else diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 01f4d0ca81..d7680ffa81 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -41,6 +41,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "tgsi/tgsi_exec.h" +#include "util/u_math.h" /* @@ -499,7 +500,7 @@ compute_lambda(struct tgsi_sampler *sampler, rho = MAX2(rho, max); } - lambda = LOG2(rho); + lambda = util_fast_log2(rho); lambda += lodbias + sampler->state->lod_bias; lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod); diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 473a8d94ab..660192b28e 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -443,21 +443,6 @@ static INLINE int iround(float f) #define FABSF(x) ((float) fabs(x)) #endif -/* Pretty fast, and accurate. - * Based on code from http://www.flipcode.com/totd/ - */ -static INLINE float LOG2(float val) -{ - union fi num; - int log_2; - - num.f = val; - log_2 = ((num.i >> 23) & 255) - 128; - num.i &= ~(255 << 23); - num.i += 127 << 23; - num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3; - return num.f + log_2; -} #if defined(__GNUC__) #define CEILF(x) ceilf(x) -- cgit v1.2.3 From 4f25420bdd834e81a3e22733304efc5261c2998a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sun, 24 Aug 2008 17:48:55 -0600 Subject: gallium: refactor/replace p_util.h with util/u_memory.h and util/u_math.h Also, rename p_tile.[ch] to u_tile.[ch] --- src/gallium/README.portability | 4 +- src/gallium/auxiliary/cso_cache/cso_cache.c | 3 +- src/gallium/auxiliary/cso_cache/cso_context.c | 2 +- src/gallium/auxiliary/cso_cache/cso_hash.c | 2 +- src/gallium/auxiliary/draw/draw_context.c | 3 +- src/gallium/auxiliary/draw/draw_pipe.c | 1 - src/gallium/auxiliary/draw/draw_pipe_aaline.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_clip.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_cull.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_offset.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 6 +- src/gallium/auxiliary/draw/draw_pipe_twoside.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_util.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_validate.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_wide_line.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 3 +- src/gallium/auxiliary/draw/draw_pt.c | 1 - src/gallium/auxiliary/draw/draw_pt_emit.c | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch.c | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 +- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 3 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 3 +- src/gallium/auxiliary/draw/draw_pt_post_vs.c | 2 +- src/gallium/auxiliary/draw/draw_pt_util.c | 1 - src/gallium/auxiliary/draw/draw_pt_varray.c | 4 +- src/gallium/auxiliary/draw/draw_pt_vcache.c | 2 +- src/gallium/auxiliary/draw/draw_vbuf.h | 2 - src/gallium/auxiliary/draw/draw_vs.c | 6 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 4 +- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 +- src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 3 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 3 +- src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 - src/gallium/auxiliary/draw/draw_vs_sse.c | 3 +- src/gallium/auxiliary/draw/draw_vs_varient.c | 3 +- src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 3 +- src/gallium/auxiliary/gallivm/instructions.cpp | 2 +- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 2 +- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 2 +- .../auxiliary/pipebuffer/pb_buffer_malloc.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 2 +- .../auxiliary/pipebuffer/pb_bufmgr_fenced.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_validate.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_winsys.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 2 +- src/gallium/auxiliary/sct/sct.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_build.c | 1 - src/gallium/auxiliary/tgsi/tgsi_build.h | 4 + src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 1 - src/gallium/auxiliary/tgsi/tgsi_exec.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_parse.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_scan.c | 6 +- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_transform.c | 1 + src/gallium/auxiliary/tgsi/tgsi_transform.h | 1 - src/gallium/auxiliary/tgsi/tgsi_util.c | 1 - src/gallium/auxiliary/translate/translate.c | 1 - src/gallium/auxiliary/translate/translate_cache.c | 2 +- .../auxiliary/translate/translate_generic.c | 2 +- src/gallium/auxiliary/translate/translate_sse.c | 2 +- src/gallium/auxiliary/util/Makefile | 2 +- src/gallium/auxiliary/util/SConscript | 2 +- src/gallium/auxiliary/util/p_debug.c | 1 - src/gallium/auxiliary/util/u_blit.c | 5 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 2 +- src/gallium/auxiliary/util/u_handle_table.c | 4 +- src/gallium/auxiliary/util/u_hash_table.c | 5 +- src/gallium/auxiliary/util/u_math.h | 240 +++- src/gallium/auxiliary/util/u_memory.h | 222 ++++ src/gallium/auxiliary/util/u_mm.c | 2 +- src/gallium/auxiliary/util/u_pack_color.h | 36 +- src/gallium/auxiliary/util/u_pointer.h | 107 ++ src/gallium/auxiliary/util/u_rect.c | 1 - src/gallium/auxiliary/util/u_simple_shaders.c | 2 +- src/gallium/auxiliary/util/u_tile.c | 1169 ++++++++++++++++++++ src/gallium/auxiliary/util/u_tile.h | 101 ++ src/gallium/drivers/cell/common.h | 1 - src/gallium/drivers/cell/ppu/cell_clear.c | 2 +- src/gallium/drivers/cell/ppu/cell_context.c | 2 +- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 +- src/gallium/drivers/cell/ppu/cell_render.c | 2 +- src/gallium/drivers/cell/ppu/cell_screen.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_derived.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_shader.c | 2 +- src/gallium/drivers/cell/ppu/cell_surface.c | 2 +- src/gallium/drivers/cell/ppu/cell_texture.c | 2 +- src/gallium/drivers/cell/ppu/cell_winsys.c | 2 +- src/gallium/drivers/cell/spu/spu_exec.c | 1 - src/gallium/drivers/cell/spu/spu_tri.c | 1 - src/gallium/drivers/cell/spu/spu_util.c | 1 - src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 1 - src/gallium/drivers/cell/spu/spu_vertex_shader.c | 1 - src/gallium/drivers/failover/fo_context.c | 2 +- src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/i915simple/i915_debug_fp.c | 2 +- src/gallium/drivers/i915simple/i915_fpc.h | 1 - .../drivers/i915simple/i915_fpc_translate.c | 2 + src/gallium/drivers/i915simple/i915_prim_emit.c | 4 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 3 +- src/gallium/drivers/i915simple/i915_screen.c | 2 +- src/gallium/drivers/i915simple/i915_state.c | 3 +- .../drivers/i915simple/i915_state_derived.c | 2 +- .../drivers/i915simple/i915_state_dynamic.c | 4 +- .../drivers/i915simple/i915_state_immediate.c | 2 +- .../drivers/i915simple/i915_state_sampler.c | 2 +- src/gallium/drivers/i915simple/i915_surface.c | 3 +- src/gallium/drivers/i915simple/i915_texture.c | 3 +- src/gallium/drivers/i965simple/brw_cc.c | 6 +- src/gallium/drivers/i965simple/brw_clip_state.c | 3 +- src/gallium/drivers/i965simple/brw_context.c | 2 +- src/gallium/drivers/i965simple/brw_curbe.c | 3 +- src/gallium/drivers/i965simple/brw_draw_upload.c | 1 + src/gallium/drivers/i965simple/brw_gs_state.c | 3 +- src/gallium/drivers/i965simple/brw_screen.c | 2 +- src/gallium/drivers/i965simple/brw_sf_state.c | 5 +- src/gallium/drivers/i965simple/brw_shader_info.c | 2 +- src/gallium/drivers/i965simple/brw_state.c | 2 +- src/gallium/drivers/i965simple/brw_state_batch.c | 2 +- src/gallium/drivers/i965simple/brw_state_cache.c | 2 +- src/gallium/drivers/i965simple/brw_state_pool.c | 3 +- src/gallium/drivers/i965simple/brw_state_upload.c | 2 +- src/gallium/drivers/i965simple/brw_surface.c | 3 +- src/gallium/drivers/i965simple/brw_tex_layout.c | 8 +- src/gallium/drivers/i965simple/brw_vs_state.c | 3 +- src/gallium/drivers/i965simple/brw_wm.c | 2 +- src/gallium/drivers/i965simple/brw_wm_decl.c | 3 +- src/gallium/drivers/i965simple/brw_wm_glsl.c | 3 +- .../drivers/i965simple/brw_wm_sampler_state.c | 3 +- src/gallium/drivers/i965simple/brw_wm_state.c | 3 +- src/gallium/drivers/softpipe/sp_context.c | 2 +- src/gallium/drivers/softpipe/sp_fs_exec.c | 2 +- src/gallium/drivers/softpipe/sp_fs_llvm.c | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 2 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 1 + src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 2 +- src/gallium/drivers/softpipe/sp_quad_blend.c | 29 +- src/gallium/drivers/softpipe/sp_quad_bufloop.c | 2 +- src/gallium/drivers/softpipe/sp_quad_colormask.c | 3 +- src/gallium/drivers/softpipe/sp_quad_coverage.c | 2 +- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 2 +- src/gallium/drivers/softpipe/sp_quad_earlyz.c | 2 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 3 +- src/gallium/drivers/softpipe/sp_quad_occlusion.c | 2 +- src/gallium/drivers/softpipe/sp_quad_output.c | 2 +- src/gallium/drivers/softpipe/sp_quad_stencil.c | 2 +- src/gallium/drivers/softpipe/sp_quad_stipple.c | 2 +- src/gallium/drivers/softpipe/sp_query.c | 2 +- src/gallium/drivers/softpipe/sp_screen.c | 2 +- src/gallium/drivers/softpipe/sp_setup.c | 2 +- src/gallium/drivers/softpipe/sp_state_blend.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 3 +- src/gallium/drivers/softpipe/sp_state_fs.c | 2 +- src/gallium/drivers/softpipe/sp_state_rasterizer.c | 2 +- src/gallium/drivers/softpipe/sp_state_sampler.c | 2 +- src/gallium/drivers/softpipe/sp_surface.c | 3 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- src/gallium/drivers/softpipe/sp_texture.c | 3 +- src/gallium/drivers/softpipe/sp_tile_cache.c | 4 +- src/gallium/drivers/trace/tr_context.c | 2 +- src/gallium/drivers/trace/tr_dump.c | 2 + src/gallium/drivers/trace/tr_dump.h | 1 - src/gallium/drivers/trace/tr_screen.c | 2 +- src/gallium/drivers/trace/tr_state.c | 1 + src/gallium/drivers/trace/tr_stream_stdc.c | 2 +- src/gallium/drivers/trace/tr_stream_wd.c | 2 +- src/gallium/drivers/trace/tr_texture.c | 2 +- src/gallium/drivers/trace/tr_winsys.c | 3 +- src/gallium/include/pipe/p_util.h | 460 -------- src/gallium/state_trackers/python/gallium.i | 2 +- src/gallium/state_trackers/python/st_device.c | 3 +- src/gallium/state_trackers/python/st_sample.c | 5 +- .../state_trackers/python/st_softpipe_winsys.c | 3 +- .../winsys/drm/intel/common/intel_be_device.c | 2 +- .../winsys/drm/intel/dri/intel_winsys_softpipe.c | 2 +- src/gallium/winsys/egl_xlib/egl_xlib.c | 2 +- src/gallium/winsys/egl_xlib/sw_winsys.c | 3 +- src/gallium/winsys/gdi/wmesa.c | 2 +- src/gallium/winsys/xlib/brw_aub.c | 1 - src/gallium/winsys/xlib/xm_winsys.c | 3 +- src/gallium/winsys/xlib/xm_winsys_aub.c | 2 +- src/mesa/state_tracker/acc2.c | 319 ++++++ src/mesa/state_tracker/st_cb_accum.c | 2 +- src/mesa/state_tracker/st_cb_bitmap.c | 2 +- src/mesa/state_tracker/st_cb_drawpixels.c | 2 +- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/state_tracker/st_cb_texture.c | 2 +- src/mesa/state_tracker/st_program.c | 2 +- src/mesa/state_tracker/st_texture.c | 1 - 201 files changed, 2453 insertions(+), 686 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_memory.h create mode 100644 src/gallium/auxiliary/util/u_pointer.h create mode 100644 src/gallium/auxiliary/util/u_tile.c create mode 100644 src/gallium/auxiliary/util/u_tile.h delete mode 100644 src/gallium/include/pipe/p_util.h create mode 100644 src/mesa/state_tracker/acc2.c (limited to 'src/gallium/auxiliary/tgsi/tgsi_sse2.c') diff --git a/src/gallium/README.portability b/src/gallium/README.portability index d5d5987a7f..adecf4bb79 100644 --- a/src/gallium/README.portability +++ b/src/gallium/README.portability @@ -35,8 +35,8 @@ not available in Windows Kernel Mode. Use the appropriate p_*.h include. * Use MALLOC, CALLOC, FREE instead of the malloc, calloc, free functions. -* Use align_pointer() function defined in p_util.h for aligning pointers in a -portable way. +* Use align_pointer() function defined in u_memory.h for aligning pointers + in a portable way. == Debugging == diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 36dc46ff80..6b1754ea00 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -28,9 +28,10 @@ /* Authors: Zack Rusin */ -#include "pipe/p_util.h" #include "pipe/p_debug.h" +#include "util/u_memory.h" + #include "cso_cache.h" #include "cso_hash.h" diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 86e4d46a20..f22ba40824 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -36,7 +36,7 @@ */ #include "pipe/p_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 0646efd952..7f0044c5a7 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -31,7 +31,7 @@ */ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cso_hash.h" diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 2f263cf06a..1c26cb31a3 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -31,7 +31,8 @@ */ -#include "pipe/p_util.h" +#include "util/u_memory.h" +#include "util/u_math.h" #include "draw_context.h" #include "draw_vbuf.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 1db43876ef..3cde9d36d3 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -30,7 +30,6 @@ * Keith Whitwell */ -#include "pipe/p_util.h" #include "draw/draw_private.h" #include "draw/draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 991304b2c8..20841bb5d6 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -32,11 +32,12 @@ */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index c7f4349cb3..2c1cacbdb4 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -38,7 +38,6 @@ */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" @@ -47,6 +46,9 @@ #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "draw_context.h" #include "draw_vs.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index fa10f8efca..3265dcd154 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -32,7 +32,9 @@ */ -#include "pipe/p_util.h" +#include "util/u_memory.h" +#include "util/u_math.h" + #include "pipe/p_shader_tokens.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index d0d22a38e0..053be5f050 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -33,7 +33,7 @@ */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 4741b22d02..43d1fecc4d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -28,7 +28,9 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "pipe/p_shader_tokens.h" #include "draw_vs.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 2f5865741c..1fea5e6dcb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -32,7 +32,8 @@ * \author Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index e97136fa1f..b764d9c518 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -34,12 +34,14 @@ */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index bf0db18a68..b65e2aa102 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -36,10 +36,12 @@ */ -#include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_pipe.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "draw/draw_pipe.h" /** Subclass of draw_stage */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 3ac825f565..c329d92339 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -28,7 +28,8 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index 8f97fdedaa..68835fd1a5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -33,7 +33,7 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "draw_private.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c index 04438f4dd0..e22e5fed0c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_util.c +++ b/src/gallium/auxiliary/draw/draw_pipe_util.c @@ -30,7 +30,7 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_private.h" #include "draw/draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index 6be1d369c3..f34c68728e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -28,7 +28,7 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "draw_private.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index a6fde77a0e..c0cf4269db 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -35,7 +35,8 @@ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw_vbuf.h" #include "draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 48ec2f1239..184e363594 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -28,9 +28,10 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw_private.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 54590984c6..4f1326053d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -28,7 +28,8 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 85a75525c8..669c11c993 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -30,7 +30,6 @@ * Keith Whitwell */ -#include "pipe/p_util.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 40f05cb9e0..d4eca80588 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 07f4c99164..6377f896fb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 4a1f3b0953..0684c93d10 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -30,7 +30,7 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index fdf9b6fe6a..87094f3092 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -31,7 +31,8 @@ */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index be3535ed9e..f617aac9f7 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -25,7 +25,8 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index af6306b1c6..96dc706b99 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_context.h" #include "draw/draw_context.h" #include "draw/draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c index 32c8a9632c..3bc7939c55 100644 --- a/src/gallium/auxiliary/draw/draw_pt_util.c +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -30,7 +30,6 @@ * Keith Whitwell */ -#include "pipe/p_util.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 46e722a154..c15afe65f1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -25,7 +25,9 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index cda2987c9e..b8b5de729d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -30,7 +30,7 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index e90f37872a..62247ccd9f 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -37,8 +37,6 @@ #define DRAW_VBUF_H_ -#include "pipe/p_util.h" - struct draw_context; struct vertex_info; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index f798b20492..34adbd49b0 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -31,11 +31,15 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "pipe/p_shader_tokens.h" + #include "draw_private.h" #include "draw_context.h" #include "draw_vs.h" + #include "translate/translate.h" #include "translate/translate_cache.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 41bdd012d5..760fcb389f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -29,9 +29,9 @@ */ -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" #include "util/u_math.h" +#include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index eda677cc62..ab3c5b94a5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index e029b7b4bb..b358bd2df4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -29,8 +29,9 @@ #include "pipe/p_config.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index e26903d8cc..44563803f9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -31,7 +31,8 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index fc03473b91..2ce30b9a02 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -32,7 +32,6 @@ * Brian Paul */ -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" #include "draw_context.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 61f0c084c3..0efabd9de8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -31,13 +31,14 @@ * Brian Paul */ +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_config.h" #include "draw_vs.h" #if defined(PIPE_ARCH_X86) -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 994ce3e889..4daf05dae7 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -30,7 +30,8 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" +#include "util/u_math.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index cf5b978837..e64bfb1c6c 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -41,11 +41,12 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_util.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" +#include "util/u_memory.h" + #include #include #include diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 035224e8f3..a82dc30306 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -35,7 +35,7 @@ #include "storage.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include #include diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 76049ade7c..efddc04e81 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -29,7 +29,7 @@ #include "storagesoa.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include #include diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index ce41418a0f..8ae052e875 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -45,7 +45,7 @@ #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index e90d2e5623..20fc87b39d 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -35,7 +35,7 @@ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c index 0d2d6c0c1b..2afaeafa1a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c @@ -35,7 +35,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index bed4bec4fe..b914c2d0fe 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -38,7 +38,7 @@ #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_time.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index d02e3500ff..5e518370d0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -37,7 +37,7 @@ #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_time.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index 05efd8ce41..8fc63ce648 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -35,7 +35,7 @@ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index c51e582611..b40eb6cc90 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -36,7 +36,7 @@ #include "pipe/p_defines.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_mm.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 95af08929a..93d2cc9635 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -39,7 +39,7 @@ #include "pipe/p_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 598d9ce310..af307e265a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -39,7 +39,7 @@ #include "pipe/p_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_time.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 362fd896f3..1e54fc39d4 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -35,7 +35,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_error.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_debug.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c index 978944091f..28d137dbc4 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c @@ -35,7 +35,7 @@ #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 300c1c2d9d..dfa5c35ab6 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -33,7 +33,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "rtasm_execmem.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 7f6bf577b2..285ddc0e3f 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -30,7 +30,7 @@ */ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "rtasm_ppc_spe.h" #ifdef GALLIUM_CELL diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c index 5e4126e014..49bb7ea92e 100644 --- a/src/gallium/auxiliary/sct/sct.c +++ b/src/gallium/auxiliary/sct/sct.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" #include "sct.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 050b448fe7..74614d3688 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -26,7 +26,6 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_build.h" #include "tgsi_parse.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 6ae7f324f8..7d6234746a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -28,6 +28,10 @@ #ifndef TGSI_BUILD_H #define TGSI_BUILD_H + +struct tgsi_token; + + #if defined __cplusplus extern "C" { #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 1025866a25..be25cb45a0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -26,7 +26,6 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "util/u_string.h" #include "tgsi_dump_c.h" #include "tgsi_build.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index e28b56c842..fb573fe1f0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -52,11 +52,11 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" +#include "util/u_memory.h" #include "util/u_math.h" #define FAST_MATH 1 diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index d16f0cdcad..3757486ba9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -26,10 +26,10 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" +#include "util/u_memory.h" void tgsi_full_token_init( diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 59bcf10b53..be4870a498 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -33,11 +33,11 @@ */ -#include "tgsi_scan.h" -#include "tgsi/tgsi_parse.h" +#include "util/u_math.h" #include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" -#include "pipe/p_util.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 00ed4da450..626724ad4e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "pipe/p_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c index 357f77b05a..ea87da31e5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -31,6 +31,7 @@ * Authors: Brian Paul */ +#include "pipe/p_debug.h" #include "tgsi_transform.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h index 3da0b38271..a121adbaef 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -29,7 +29,6 @@ #define TGSI_TRANSFORM_H -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_build.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 09486e649e..50101a9bb0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -26,7 +26,6 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c index b93fbf9033..7678903f75 100644 --- a/src/gallium/auxiliary/translate/translate.c +++ b/src/gallium/auxiliary/translate/translate.c @@ -31,7 +31,6 @@ */ #include "pipe/p_config.h" -#include "pipe/p_util.h" #include "pipe/p_state.h" #include "translate.h" diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c index 115dc9287e..d8069a149c 100644 --- a/src/gallium/auxiliary/translate/translate_cache.c +++ b/src/gallium/auxiliary/translate/translate_cache.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_state.h" #include "translate.h" #include "translate_cache.h" diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 4c8179ffa8..4d336f47ea 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -30,7 +30,7 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_state.h" #include "translate.h" diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 18a212ac1c..7955186e16 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -28,7 +28,7 @@ #include "pipe/p_config.h" #include "pipe/p_compiler.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_simple_list.h" #include "translate.h" diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 6eebf6d29b..6e5fd26c05 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -5,7 +5,6 @@ LIBNAME = util C_SOURCES = \ p_debug.c \ - p_tile.c \ u_blit.c \ u_draw_quad.c \ u_gen_mipmap.c \ @@ -16,6 +15,7 @@ C_SOURCES = \ u_rect.c \ u_simple_shaders.c \ u_snprintf.c \ + u_tile.c \ u_time.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 94382fe1f9..ce3fad7068 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -6,7 +6,6 @@ util = env.ConvenienceLibrary( 'p_debug.c', 'p_debug_mem.c', 'p_debug_prof.c', - 'p_tile.c', 'u_blit.c', 'u_draw_quad.c', 'u_gen_mipmap.c', @@ -17,6 +16,7 @@ util = env.ConvenienceLibrary( 'u_rect.c', 'u_simple_shaders.c', 'u_snprintf.c', + 'u_tile.c', 'u_time.c', ]) diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 2c2f2f8931..7d1dba5a24 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -51,7 +51,6 @@ #endif #include "pipe/p_compiler.h" -#include "pipe/p_util.h" #include "pipe/p_debug.h" #include "pipe/p_format.h" #include "pipe/p_state.h" diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index ae087df4cf..05399f9885 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -37,12 +37,13 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" -#include "util/u_draw_quad.h" #include "util/u_blit.h" +#include "util/u_draw_quad.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_simple_shaders.h" #include "cso_cache/cso_context.h" diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 8713ff5d58..c1e2c19f87 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -37,10 +37,10 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" #include "util/u_draw_quad.h" #include "util/u_gen_mipmap.h" #include "util/u_simple_shaders.h" diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index 2176a00959..2c40011923 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -35,9 +35,9 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "u_handle_table.h" +#include "util/u_memory.h" +#include "util/u_handle_table.h" #define HANDLE_TABLE_INITIAL_SIZE 16 diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c index dd5eca7fca..0bc8de9632 100644 --- a/src/gallium/auxiliary/util/u_hash_table.c +++ b/src/gallium/auxiliary/util/u_hash_table.c @@ -40,10 +40,11 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "cso_cache/cso_hash.h" -#include "u_hash_table.h" + +#include "util/u_memory.h" +#include "util/u_hash_table.h" struct hash_table diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index a541d30a5d..9b4ca39371 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -40,8 +40,6 @@ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" -#include "util/u_math.h" #ifdef __cplusplus @@ -49,6 +47,132 @@ extern "C" { #endif +#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +__inline double ceil(double val) +{ + double ceil_val; + + if((val - (long) val) == 0) { + ceil_val = val; + } + else { + if(val > 0) { + ceil_val = (long) val + 1; + } + else { + ceil_val = (long) val; + } + } + + return ceil_val; +} + +#ifndef PIPE_SUBSYSTEM_WINDOWS_CE +__inline double floor(double val) +{ + double floor_val; + + if((val - (long) val) == 0) { + floor_val = val; + } + else { + if(val > 0) { + floor_val = (long) val; + } + else { + floor_val = (long) val - 1; + } + } + + return floor_val; +} +#endif + +#pragma function(pow) +__inline double __cdecl pow(double val, double exponent) +{ + /* XXX */ + assert(0); + return 0; +} + +#pragma function(log) +__inline double __cdecl log(double val) +{ + /* XXX */ + assert(0); + return 0; +} + +#pragma function(atan2) +__inline double __cdecl atan2(double val) +{ + /* XXX */ + assert(0); + return 0; +} +#else +#include +#include +#endif + + +#if defined(_MSC_VER) +#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + +static INLINE float cosf( float f ) +{ + return (float) cos( (double) f ); +} + +static INLINE float sinf( float f ) +{ + return (float) sin( (double) f ); +} + +static INLINE float ceilf( float f ) +{ + return (float) ceil( (double) f ); +} + +static INLINE float floorf( float f ) +{ + return (float) floor( (double) f ); +} + +static INLINE float powf( float f, float g ) +{ + return (float) pow( (double) f, (double) g ); +} + +static INLINE float sqrtf( float f ) +{ + return (float) sqrt( (double) f ); +} + +static INLINE float fabsf( float f ) +{ + return (float) fabs( (double) f ); +} + +static INLINE float logf( float f ) +{ + return (float) log( (double) f ); +} + +#else +/* Work-around an extra semi-colon in VS 2005 logf definition */ +#ifdef logf +#undef logf +#define logf(x) ((float)log((double)(x))) +#endif /* logf */ +#endif +#endif /* _MSC_VER */ + + + + + #define POW2_TABLE_SIZE 256 #define POW2_TABLE_SCALE ((float) (POW2_TABLE_SIZE-1)) extern float pow2_table[POW2_TABLE_SIZE]; @@ -59,6 +183,11 @@ extern void util_init_math(void); +union fi { + float f; + int i; + unsigned ui; +}; /** @@ -195,6 +324,113 @@ util_iround(float f) +#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) +/** + * Find first bit set in word. Least significant bit is 1. + * Return 0 if no bits set. + */ +static INLINE +unsigned ffs( unsigned u ) +{ + unsigned i; + + if( u == 0 ) { + return 0; + } + + __asm bsf eax, [u] + __asm inc eax + __asm mov [i], eax + + return i; +} +#endif + + +/** + * Return float bits. + */ +static INLINE unsigned +fui( float f ) +{ + union fi fi; + fi.f = f; + return fi.ui; +} + + + +static INLINE float +ubyte_to_float(ubyte ub) +{ + return (float) ub * (1.0f / 255.0f); +} + + +/** + * Convert float in [0,1] to ubyte in [0,255] with clamping. + */ +static INLINE ubyte +float_to_ubyte(float f) +{ + const int ieee_0996 = 0x3f7f0000; /* 0.996 or so */ + union fi tmp; + + tmp.f = f; + if (tmp.i < 0) { + return (ubyte) 0; + } + else if (tmp.i >= ieee_0996) { + return (ubyte) 255; + } + else { + tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f; + return (ubyte) tmp.i; + } +} + + + +#define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) ) + +#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) +#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) + + +static INLINE int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + + +#ifndef COPY_4V +#define COPY_4V( DST, SRC ) \ +do { \ + (DST)[0] = (SRC)[0]; \ + (DST)[1] = (SRC)[1]; \ + (DST)[2] = (SRC)[2]; \ + (DST)[3] = (SRC)[3]; \ +} while (0) +#endif + + +#ifndef COPY_4FV +#define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC) +#endif + + +#ifndef ASSIGN_4V +#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \ +do { \ + (DST)[0] = (V0); \ + (DST)[1] = (V1); \ + (DST)[2] = (V2); \ + (DST)[3] = (V3); \ +} while (0) +#endif + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h new file mode 100644 index 0000000000..148a5cb997 --- /dev/null +++ b/src/gallium/auxiliary/util/u_memory.h @@ -0,0 +1,222 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Memory functions + */ + + +#ifndef U_MEMORY_H +#define U_MEMORY_H + + +#include "util/u_pointer.h" + + + /* Define ENOMEM for WINCE */ +#if (_WIN32_WCE < 600) +#ifndef ENOMEM +#define ENOMEM 12 +#endif +#endif + + + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) + +/* memory debugging */ + +#include "p_debug.h" + +#define MALLOC( _size ) \ + debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) +#define CALLOC( _count, _size ) \ + debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) +#define FREE( _ptr ) \ + debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) +#define REALLOC( _ptr, _old_size, _size ) \ + debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _size ) + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +void * __stdcall +EngAllocMem( + unsigned long Flags, + unsigned long MemSize, + unsigned long Tag ); + +void __stdcall +EngFreeMem( + void *Mem ); + +#define MALLOC( _size ) EngAllocMem( 0, _size, 'D3AG' ) +#define _FREE( _ptr ) EngFreeMem( _ptr ) + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + +void * +ExAllocatePool( + unsigned long PoolType, + size_t NumberOfBytes); + +void +ExFreePool(void *P); + +#define MALLOC(_size) ExAllocatePool(0, _size) +#define _FREE(_ptr) ExFreePool(_ptr) + +#else + +#define MALLOC( SIZE ) malloc( SIZE ) +#define CALLOC( COUNT, SIZE ) calloc( COUNT, SIZE ) +#define FREE( PTR ) free( PTR ) +#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE ) realloc( OLDPTR, NEWSIZE ) + +#endif + + +#ifndef CALLOC +static INLINE void * +CALLOC( unsigned count, unsigned size ) +{ + void *ptr = MALLOC( count * size ); + if( ptr ) { + memset( ptr, 0, count * size ); + } + return ptr; +} +#endif /* !CALLOC */ + +#ifndef FREE +static INLINE void +FREE( void *ptr ) +{ + if( ptr ) { + _FREE( ptr ); + } +} +#endif /* !FREE */ + +#ifndef REALLOC +static INLINE void * +REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) +{ + void *new_ptr = NULL; + + if (new_size != 0) { + unsigned copy_size = old_size < new_size ? old_size : new_size; + new_ptr = MALLOC( new_size ); + if (new_ptr && old_ptr && copy_size) { + memcpy( new_ptr, old_ptr, copy_size ); + } + } + + FREE( old_ptr ); + return new_ptr; +} +#endif /* !REALLOC */ + + +#define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) + +#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) + + +/** + * Return memory on given byte alignment + */ +static INLINE void * +align_malloc(size_t bytes, uint alignment) +{ +#if defined(HAVE_POSIX_MEMALIGN) + void *mem; + alignment = (alignment + (uint)sizeof(void*) - 1) & ~((uint)sizeof(void*) - 1); + if(posix_memalign(& mem, alignment, bytes) != 0) + return NULL; + return mem; +#else + char *ptr, *buf; + + assert( alignment > 0 ); + + ptr = (char *) MALLOC(bytes + alignment + sizeof(void *)); + if (!ptr) + return NULL; + + buf = (char *) align_pointer( ptr + sizeof(void *), alignment ); + *(char **)(buf - sizeof(void *)) = ptr; + + return buf; +#endif /* defined(HAVE_POSIX_MEMALIGN) */ +} + +/** + * Free memory returned by align_malloc(). + */ +static INLINE void +align_free(void *ptr) +{ +#if defined(HAVE_POSIX_MEMALIGN) + FREE(ptr); +#else + void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); + void *realAddr = *cubbyHole; + FREE(realAddr); +#endif /* defined(HAVE_POSIX_MEMALIGN) */ +} + + +/** + * Duplicate a block of memory. + */ +static INLINE void * +mem_dup(const void *src, uint size) +{ + void *dup = MALLOC(size); + if (dup) + memcpy(dup, src, size); + return dup; +} + + +/** + * Number of elements in an array. + */ +#ifndef Elements +#define Elements(x) (sizeof(x)/sizeof((x)[0])) +#endif + + +/** + * Offset of a field in a struct, in bytes. + */ +#define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER)) + + + +#endif /* U_MEMORY_H */ diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c index b49ae074e0..0f51dd5977 100644 --- a/src/gallium/auxiliary/util/u_mm.c +++ b/src/gallium/auxiliary/util/u_mm.c @@ -24,9 +24,9 @@ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" #include "pipe/p_debug.h" +#include "util/u_memory.h" #include "util/u_mm.h" diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 06abb34d5a..39e4ae9d07 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -37,6 +37,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" +#include "util/u_math.h" /** @@ -150,10 +151,10 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) if (pf_size_x(format) <= 8) { /* format uses 8-bit components or less */ - UNCLAMPED_FLOAT_TO_UBYTE(r, rgba[0]); - UNCLAMPED_FLOAT_TO_UBYTE(g, rgba[1]); - UNCLAMPED_FLOAT_TO_UBYTE(b, rgba[2]); - UNCLAMPED_FLOAT_TO_UBYTE(a, rgba[3]); + r = float_to_ubyte(rgba[0]); + g = float_to_ubyte(rgba[1]); + b = float_to_ubyte(rgba[2]); + a = float_to_ubyte(rgba[3]); } switch (format) { @@ -286,4 +287,31 @@ util_pack_z(enum pipe_format format, double z) } +/** + * Pack 4 ubytes into a 4-byte word + */ +static INLINE unsigned +pack_ub4(ubyte b0, ubyte b1, ubyte b2, ubyte b3) +{ + return ((((unsigned int)b0) << 0) | + (((unsigned int)b1) << 8) | + (((unsigned int)b2) << 16) | + (((unsigned int)b3) << 24)); +} + + +/** + * Pack/convert 4 floats into one 4-byte word. + */ +static INLINE unsigned +pack_ui32_float4(float a, float b, float c, float d) +{ + return pack_ub4( float_to_ubyte(a), + float_to_ubyte(b), + float_to_ubyte(c), + float_to_ubyte(d) ); +} + + + #endif /* U_PACK_COLOR_H */ diff --git a/src/gallium/auxiliary/util/u_pointer.h b/src/gallium/auxiliary/util/u_pointer.h new file mode 100644 index 0000000000..e1af9f11cb --- /dev/null +++ b/src/gallium/auxiliary/util/u_pointer.h @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_POINTER_H +#define U_POINTER_H + +#include "pipe/p_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static INLINE intptr_t +pointer_to_intptr( const void *p ) +{ + union { + const void *p; + intptr_t i; + } pi; + pi.p = p; + return pi.i; +} + +static INLINE void * +intptr_to_pointer( intptr_t i ) +{ + union { + void *p; + intptr_t i; + } pi; + pi.i = i; + return pi.p; +} + +static INLINE uintptr_t +pointer_to_uintptr( const void *ptr ) +{ + union { + const void *p; + uintptr_t u; + } pu; + pu.p = ptr; + return pu.u; +} + +static INLINE void * +uintptr_to_pointer( uintptr_t u ) +{ + union { + void *p; + uintptr_t u; + } pu; + pu.u = u; + return pu.p; +} + +/** + * Return a pointer aligned to next multiple of N bytes. + */ +static INLINE void * +align_pointer( const void *unaligned, uintptr_t alignment ) +{ + uintptr_t aligned = (pointer_to_uintptr( unaligned ) + alignment - 1) & ~(alignment - 1); + return uintptr_to_pointer( aligned ); +} + + +/** + * Return a pointer aligned to next multiple of 16 bytes. + */ +static INLINE void * +align16( void *unaligned ) +{ + return align_pointer( unaligned, 16 ); +} + + + +#ifdef __cplusplus +} +#endif + +#endif /* U_POINTER_H */ diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 94e447b9d5..b31ab5415f 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -31,7 +31,6 @@ #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_format.h" #include "util/u_rect.h" diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index c34fb6ee33..f06d13c2c4 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -37,10 +37,10 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" #include "util/u_simple_shaders.h" #include "tgsi/tgsi_build.h" diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c new file mode 100644 index 0000000000..853c503f4f --- /dev/null +++ b/src/gallium/auxiliary/util/u_tile.c @@ -0,0 +1,1169 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * RGBA/float tile get/put functions. + * Usable both by drivers and state trackers. + * Surfaces should already be in a mapped state. + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_rect.h" +#include "util/u_tile.h" + + +/** + * Move raw block of pixels from surface to user memory. + * This should be usable by any hw driver that has mappable surfaces. + */ +void +pipe_get_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + void *dst, int dst_stride) +{ + const void *src; + + if (dst_stride == 0) + dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + src = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); + assert(src); + if(!src) + return; + + pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y); + + pipe_surface_unmap(ps); +} + + +/** + * Move raw block of pixels from user memory to surface. + * This should be usable by any hw driver that has mappable surfaces. + */ +void +pipe_put_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const void *src, int src_stride) +{ + void *dst; + + if (src_stride == 0) + src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + dst = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(dst); + if(!dst) + return; + + pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0); + + pipe_surface_unmap(ps); +} + + + + +/** Convert short in [-32768,32767] to GLfloat in [-1.0,1.0] */ +#define SHORT_TO_FLOAT(S) ((2.0F * (S) + 1.0F) * (1.0F/65535.0F)) + +#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ + us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) ) + + + +/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ + +static void +a8r8g8b8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); + pRow[3] = ubyte_to_float((pixel >> 24) & 0xff); + } + p += dst_stride; + } +} + + +static void +a8r8g8b8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + *dst++ = (a << 24) | (r << 16) | (g << 8) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ + +static void +x8r8g8b8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); + pRow[3] = ubyte_to_float(0xff); + } + p += dst_stride; + } +} + + +static void +x8r8g8b8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ + +static void +b8g8r8a8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 24) & 0xff); + pRow[3] = ubyte_to_float((pixel >> 0) & 0xff); + } + p += dst_stride; + } +} + + +static void +b8g8r8a8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + *dst++ = (b << 24) | (g << 16) | (r << 8) | a; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/ + +static void +a1r5g5b5_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f); + pRow[1] = ((pixel >> 5) & 0x1f) * (1.0f / 31.0f); + pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); + pRow[3] = ((pixel >> 15) ) * 1.0f; + } + p += dst_stride; + } +} + + +static void +a1r5g5b5_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + r = r >> 3; /* 5 bits */ + g = g >> 3; /* 5 bits */ + b = b >> 3; /* 5 bits */ + a = a >> 7; /* 1 bit */ + *dst++ = (a << 15) | (r << 10) | (g << 5) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/ + +static void +a4r4g4b4_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 8) & 0xf) * (1.0f / 15.0f); + pRow[1] = ((pixel >> 4) & 0xf) * (1.0f / 15.0f); + pRow[2] = ((pixel ) & 0xf) * (1.0f / 15.0f); + pRow[3] = ((pixel >> 12) ) * (1.0f / 15.0f); + } + p += dst_stride; + } +} + + +static void +a4r4g4b4_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + r >>= 4; + g >>= 4; + b >>= 4; + a >>= 4; + *dst++ = (a << 12) | (r << 16) | (g << 4) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_R5G6B5_UNORM ***/ + +static void +r5g6b5_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f); + pRow[1] = ((pixel >> 5) & 0x3f) * (1.0f / 63.0f); + pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); + pRow[3] = 1.0f; + } + p += dst_stride; + } +} + + +static void +r5g6b5_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + uint r = (uint) (CLAMP(pRow[0], 0.0, 1.0) * 31.0); + uint g = (uint) (CLAMP(pRow[1], 0.0, 1.0) * 63.0); + uint b = (uint) (CLAMP(pRow[2], 0.0, 1.0) * 31.0); + *dst++ = (r << 11) | (g << 5) | (b); + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_Z16_UNORM ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z16_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const float scale = 1.0f / 65535.0f; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = *src++ * scale; + } + p += dst_stride; + } +} + + + + +/*** PIPE_FORMAT_L8_UNORM ***/ + +static void +l8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = ubyte_to_float(*src); + pRow[3] = 1.0; + } + p += dst_stride; + } +} + + +static void +l8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r; + r = float_to_ubyte(pRow[0]); + *dst++ = r; + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_A8_UNORM ***/ + +static void +a8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = 0.0; + pRow[3] = ubyte_to_float(*src); + } + p += dst_stride; + } +} + + +static void +a8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned a; + a = float_to_ubyte(pRow[3]); + *dst++ = a; + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_R16_SNORM ***/ + +static void +r16_get_tile_rgba(const short *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = SHORT_TO_FLOAT(src[0]); + pRow[1] = + pRow[2] = 0.0; + pRow[3] = 1.0; + } + p += dst_stride; + } +} + + +static void +r16_put_tile_rgba(short *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, dst++, pRow += 4) { + UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/ + +static void +r16g16b16a16_get_tile_rgba(const short *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src += 4, pRow += 4) { + pRow[0] = SHORT_TO_FLOAT(src[0]); + pRow[1] = SHORT_TO_FLOAT(src[1]); + pRow[2] = SHORT_TO_FLOAT(src[2]); + pRow[3] = SHORT_TO_FLOAT(src[3]); + } + p += dst_stride; + } +} + + +static void +r16g16b16a16_put_tile_rgba(short *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, dst += 4, pRow += 4) { + UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); + UNCLAMPED_FLOAT_TO_SHORT(dst[1], pRow[1]); + UNCLAMPED_FLOAT_TO_SHORT(dst[2], pRow[2]); + UNCLAMPED_FLOAT_TO_SHORT(dst[3], pRow[3]); + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_I8_UNORM ***/ + +static void +i8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = ubyte_to_float(*src); + } + p += dst_stride; + } +} + + +static void +i8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r; + r = float_to_ubyte(pRow[0]); + *dst++ = r; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A8L8_UNORM ***/ + +static void +a8l8_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + ushort p = *src++; + pRow[0] = + pRow[1] = + pRow[2] = ubyte_to_float(p & 0xff); + pRow[3] = ubyte_to_float(p >> 8); + } + p += dst_stride; + } +} + + +static void +a8l8_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, a; + r = float_to_ubyte(pRow[0]); + a = float_to_ubyte(pRow[3]); + *dst++ = (a << 8) | r; + } + p += src_stride; + } +} + + + + +/*** PIPE_FORMAT_Z32_UNORM ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z32_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / (double) 0xffffffff; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (*src++ * scale); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_S8Z24_UNORM ***/ + +/** + * Return Z component as four float in [0,1]. Stencil part ignored. + */ +static void +s8z24_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / ((1 << 24) - 1); + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (scale * (*src++ & 0xffffff)); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_Z24S8_UNORM ***/ + +/** + * Return Z component as four float in [0,1]. Stencil part ignored. + */ +static void +z24s8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / ((1 << 24) - 1); + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (scale * (*src++ >> 8)); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/ + +/** + * Convert YCbCr (or YCrCb) to RGBA. + */ +static void +ycbcr_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride, + boolean rev) +{ + const float scale = 1.0f / 255.0f; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + /* do two texels at a time */ + for (j = 0; j < (w & ~1); j += 2, src += 2) { + const ushort t0 = src[0]; + const ushort t1 = src[1]; + const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ + const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */ + ubyte cb, cr; + float r, g, b; + + if (rev) { + cb = t1 & 0xff; /* chroma U */ + cr = t0 & 0xff; /* chroma V */ + } + else { + cb = t0 & 0xff; /* chroma U */ + cr = t1 & 0xff; /* chroma V */ + } + + /* even pixel: y0,cr,cb */ + r = 1.164f * (y0-16) + 1.596f * (cr-128); + g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y0-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + + /* odd pixel: use y1,cr,cb */ + r = 1.164f * (y1-16) + 1.596f * (cr-128); + g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y1-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + + } + /* do the last texel */ + if (w & 1) { + const ushort t0 = src[0]; + const ushort t1 = src[1]; + const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ + ubyte cb, cr; + float r, g, b; + + if (rev) { + cb = t1 & 0xff; /* chroma U */ + cr = t0 & 0xff; /* chroma V */ + } + else { + cb = t0 & 0xff; /* chroma U */ + cr = t1 & 0xff; /* chroma V */ + } + + /* even pixel: y0,cr,cb */ + r = 1.164f * (y0-16) + 1.596f * (cr-128); + g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y0-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + } + p += dst_stride; + } +} + + +void +pipe_tile_raw_to_rgba(enum pipe_format format, + void *src, + uint w, uint h, + float *dst, unsigned dst_stride) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + a4r4g4b4_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_L8_UNORM: + l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A8_UNORM: + a8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_I8_UNORM: + i8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A8L8_UNORM: + a8l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R16_SNORM: + r16_get_tile_rgba((short *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + r16g16b16a16_get_tile_rgba((short *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z16_UNORM: + z16_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z32_UNORM: + z32_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z24S8_UNORM: + z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_YCBCR: + ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE); + break; + case PIPE_FORMAT_YCBCR_REV: + ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE); + break; + default: + assert(0); + } +} + + +void +pipe_get_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + float *p) +{ + unsigned dst_stride = w * 4; + void *packed; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); + + if (!packed) + return; + + if(ps->format == PIPE_FORMAT_YCBCR || ps->format == PIPE_FORMAT_YCBCR_REV) + assert((x & 1) == 0); + + pipe_get_tile_raw(ps, x, y, w, h, packed, 0); + + pipe_tile_raw_to_rgba(ps->format, packed, w, h, p, dst_stride); + + FREE(packed); +} + + +void +pipe_put_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const float *p) +{ + unsigned src_stride = w * 4; + void *packed; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); + + if (!packed) + return; + + switch (ps->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + assert(0); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_L8_UNORM: + l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A8_UNORM: + a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_I8_UNORM: + i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A8L8_UNORM: + a8l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R16_SNORM: + r16_put_tile_rgba((short *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_Z16_UNORM: + /*z16_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z32_UNORM: + /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z24S8_UNORM: + /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + default: + assert(0); + } + + pipe_put_tile_raw(ps, x, y, w, h, packed, 0); + + FREE(packed); +} + + +/** + * Get a block of Z values, converted to 32-bit range. + */ +void +pipe_get_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z) +{ + const uint dstStride = w; + ubyte *map; + uint *pDest = z; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); + if (!map) { + assert(0); + return; + } + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + const uint *pSrc + = (const uint *)(map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += dstStride; + pSrc += ps->stride/4; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + { + const uint *pSrc + = (const uint *)(map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 24-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff); + } + pDest += dstStride; + pSrc += ps->stride/4; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + const ushort *pSrc + = (const ushort *)(map + y * ps->stride + x*2); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 16-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 16) | pSrc[j]; + } + pDest += dstStride; + pSrc += ps->stride/2; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + +void +pipe_put_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *zSrc) +{ + const uint srcStride = w; + const uint *pSrc = zSrc; + ubyte *map; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); + if (!map) { + assert(0); + return; + } + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + uint *pDest = (uint *) (map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += ps->stride/4; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + { + uint *pDest = (uint *) (map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 24-bit Z (0 stencil) */ + pDest[j] = pSrc[j] >> 8; + } + pDest += ps->stride/4; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + ushort *pDest = (ushort *) (map + y * ps->stride + x*2); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 16-bit Z */ + pDest[j] = pSrc[j] >> 16; + } + pDest += ps->stride/2; + pSrc += srcStride; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h new file mode 100644 index 0000000000..a8ac805308 --- /dev/null +++ b/src/gallium/auxiliary/util/u_tile.h @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef P_TILE_H +#define P_TILE_H + +#include "pipe/p_compiler.h" + +struct pipe_surface; + + +/** + * Clip tile against surface dims. + * \return TRUE if tile is totally clipped, FALSE otherwise + */ +static INLINE boolean +pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps) +{ + if (x >= ps->width) + return TRUE; + if (y >= ps->height) + return TRUE; + if (x + *w > ps->width) + *w = ps->width - x; + if (y + *h > ps->height) + *h = ps->height - y; + return FALSE; +} + +#ifdef __cplusplus +extern "C" { +#endif + +void +pipe_get_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + void *p, int dst_stride); + +void +pipe_put_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const void *p, int src_stride); + + +void +pipe_get_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + float *p); + +void +pipe_put_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const float *p); + + +void +pipe_get_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z); + +void +pipe_put_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *z); + +void +pipe_tile_raw_to_rgba(enum pipe_format format, + void *src, + uint w, uint h, + float *dst, unsigned dst_stride); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index f430e88b9c..6bace0bb11 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -34,7 +34,6 @@ #define CELL_COMMON_H #include "pipe/p_compiler.h" -#include "pipe/p_util.h" #include "pipe/p_format.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index 3ffe09add6..cee0917b63 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -34,7 +34,7 @@ #include #include #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 12eb5aa254..5af95a3c10 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -35,7 +35,7 @@ #include "pipe/p_defines.h" #include "pipe/p_format.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 67b87f16d7..971d65d09e 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -30,7 +30,7 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "draw/draw_context.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c index b663b37622..dd25ae880e 100644 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -33,7 +33,7 @@ #include "cell_context.h" #include "cell_render.h" #include "cell_spu.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_private.h" diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 2bf441a0c5..139b3719b6 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 5480534ad9..8ab938a02a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 9cae67f091..3646a0ee4f 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cell_context.h" #include "cell_state.h" #include "cell_state_emit.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index f5707f2bb8..cd96b317fa 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -26,7 +26,7 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index 01ffa31c2c..2d31ad89a6 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -26,7 +26,7 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "util/p_tile.h" diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 533b64227d..1add81373d 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -33,7 +33,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.c b/src/gallium/drivers/cell/ppu/cell_winsys.c index ebabce3c8f..d570bbd2f9 100644 --- a/src/gallium/drivers/cell/ppu/cell_winsys.c +++ b/src/gallium/drivers/cell/ppu/cell_winsys.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cell_winsys.h" diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 42e5022f30..89c61136a4 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -63,7 +63,6 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index ab4ff8160a..8944ef171e 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -32,7 +32,6 @@ #include #include "pipe/p_compiler.h" #include "pipe/p_format.h" -#include "pipe/p_util.h" #include "spu_colorpack.h" #include "spu_main.h" #include "spu_texture.h" diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index 74ab2bbd1f..dbcf4b0eb9 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,4 +1,3 @@ -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" //#include "tgsi_build.h" diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 219fd90cc0..26f2363749 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -32,7 +32,6 @@ * Ian Romanick */ -#include "pipe/p_util.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" #include "spu_exec.h" diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index 3119a78c06..a1e81975e6 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -34,7 +34,6 @@ #include -#include "pipe/p_util.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" #include "spu_vertex_shader.h" diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 014a3e31d5..10c4ffc209 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -28,7 +28,7 @@ #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_context.h" #include "fo_context.h" diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index e2bf5ab678..c6776716a2 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -35,7 +35,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c index c024a051a5..48be3e1472 100644 --- a/src/gallium/drivers/i915simple/i915_debug_fp.c +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -29,7 +29,7 @@ #include "i915_reg.h" #include "i915_debug.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" static void diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h index 80a9576304..2f0f99d046 100644 --- a/src/gallium/drivers/i915simple/i915_fpc.h +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -29,7 +29,6 @@ #ifndef I915_FPC_H #define I915_FPC_H -#include "pipe/p_util.h" #include "i915_context.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 64432982c4..34b4a846c1 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -33,6 +33,8 @@ #include "i915_fpc.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_string.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index 9ffa460138..d194c2fb15 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -27,7 +27,9 @@ #include "draw/draw_pipe.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" #include "i915_context.h" #include "i915_winsys.h" diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index aef3682bbf..e4ece55098 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -41,9 +41,10 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "i915_context.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 0afa17bed8..e9e40c3f0b 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "util/u_string.h" diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index e8521b385e..d2487d8277 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -31,8 +31,9 @@ #include "draw/draw_context.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 4daccec6e0..488615067c 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915simple/i915_state_dynamic.c index 8cfbdddd19..86126a5a15 100644 --- a/src/gallium/drivers/i915simple/i915_state_dynamic.c +++ b/src/gallium/drivers/i915simple/i915_state_dynamic.c @@ -30,7 +30,9 @@ #include "i915_context.h" #include "i915_reg.h" #include "i915_state.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" #define FILE_DEBUG_FLAG DEBUG_STATE diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c index 2501f2d7cb..8c16bb4e27 100644 --- a/src/gallium/drivers/i915simple/i915_state_immediate.c +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -33,7 +33,7 @@ #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 7868f21ca6..c09c10601b 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -27,7 +27,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "i915_state_inlines.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 17b5125e56..62f1926644 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -30,10 +30,9 @@ #include "i915_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_rect.h" diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index ca0fb8761b..32344da4d5 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -34,8 +34,9 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "i915_context.h" #include "i915_texture.h" diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c index 337e4f95f6..79d4150383 100644 --- a/src/gallium/drivers/i965simple/brw_cc.c +++ b/src/gallium/drivers/i965simple/brw_cc.c @@ -29,7 +29,8 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "brw_context.h" #include "brw_state.h" @@ -232,8 +233,7 @@ static void upload_cc_unit( struct brw_context *brw ) cc.cc3.alpha_test_func = brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func); - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], - brw->attribs.DepthStencil->alpha.ref); + cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref); cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; } diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c index ea5c05a279..8e78dd51be 100644 --- a/src/gallium/drivers/i965simple/brw_clip_state.c +++ b/src/gallium/drivers/i965simple/brw_clip_state.c @@ -32,7 +32,8 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" static void upload_clip_unit( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index 8326f7b9c4..96920df008 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -39,7 +39,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c index 52bbd525c1..824ee7fd6d 100644 --- a/src/gallium/drivers/i965simple/brw_curbe.c +++ b/src/gallium/drivers/i965simple/brw_curbe.c @@ -39,7 +39,8 @@ #include "brw_wm.h" #include "pipe/p_state.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #define FILE_DEBUG_FLAG DEBUG_FALLBACKS diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c index 9c0c78c236..7c20ea52af 100644 --- a/src/gallium/drivers/i965simple/brw_draw_upload.c +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -33,6 +33,7 @@ #include "brw_context.h" #include "brw_state.h" + struct brw_array_state { union header_union header; diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c index 3932e9e939..5b8016b2e9 100644 --- a/src/gallium/drivers/i965simple/brw_gs_state.c +++ b/src/gallium/drivers/i965simple/brw_gs_state.c @@ -34,7 +34,8 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index fadfbf94ab..ab7cd624b2 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "util/u_string.h" diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c index 9acd3ea61b..2a5de61c21 100644 --- a/src/gallium/drivers/i965simple/brw_sf_state.c +++ b/src/gallium/drivers/i965simple/brw_sf_state.c @@ -30,11 +30,12 @@ */ - #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + static void upload_sf_vp(struct brw_context *brw) { diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index 30f37a99d4..86d877d7ef 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -1,7 +1,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 27ca32843d..af46cb546f 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -31,7 +31,7 @@ #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c index 35db76b594..43a1c89fc4 100644 --- a/src/gallium/drivers/i965simple/brw_state_batch.c +++ b/src/gallium/drivers/i965simple/brw_state_batch.c @@ -32,7 +32,7 @@ #include "brw_state.h" #include "brw_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /* A facility similar to the data caching code above, which aims to * prevent identical commands being issued repeatedly. diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c index b3a5124461..094248fa69 100644 --- a/src/gallium/drivers/i965simple/brw_state_cache.c +++ b/src/gallium/drivers/i965simple/brw_state_cache.c @@ -38,7 +38,7 @@ #include "brw_sf.h" #include "brw_gs.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c index f3174bfe0a..78d4c0e411 100644 --- a/src/gallium/drivers/i965simple/brw_state_pool.c +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -43,7 +43,8 @@ */ #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "brw_context.h" #include "brw_state.h" diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c index e727601e1e..bac9161b5f 100644 --- a/src/gallium/drivers/i965simple/brw_state_upload.c +++ b/src/gallium/drivers/i965simple/brw_state_upload.c @@ -33,7 +33,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 69da252285..b89756c47b 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -29,10 +29,9 @@ #include "brw_context.h" #include "brw_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_rect.h" diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 9b6cf81723..05eda9d1f2 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -33,16 +33,16 @@ /* Code to layout images in a mipmap tree for i965. */ -#include "brw_tex_layout.h" - #include "pipe/p_state.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" - +#include "util/u_math.h" +#include "util/u_memory.h" #include "brw_context.h" +#include "brw_tex_layout.h" + #define FILE_DEBUG_FLAG DEBUG_TEXTURE diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c index c73469929c..1eaff87892 100644 --- a/src/gallium/drivers/i965simple/brw_vs_state.c +++ b/src/gallium/drivers/i965simple/brw_vs_state.c @@ -34,7 +34,8 @@ #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" static void upload_vs_unit( struct brw_context *brw ) { diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c index 7fc5f59a98..8de565b96c 100644 --- a/src/gallium/drivers/i965simple/brw_wm.c +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -35,7 +35,7 @@ #include "brw_wm.h" #include "brw_eu.h" #include "brw_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index e6f1a44817..d50e66f613 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -2,7 +2,8 @@ #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index 6a4a5aef09..ab6410aa60 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -2,7 +2,8 @@ #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c index b9eaee56ee..52b2909a65 100644 --- a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -34,7 +34,8 @@ #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #define COMPAREFUNC_ALWAYS 0 diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c index f3aa36b07f..37a9bf919c 100644 --- a/src/gallium/drivers/i965simple/brw_wm_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_state.c @@ -34,7 +34,8 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_wm.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" /*********************************************************************** * WM unit - fragment programs and rasterization diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 9b1313bc83..dda90f760a 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -32,8 +32,8 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "util/u_math.h" +#include "util/u_memory.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_flush.h" diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index cc171bbc39..d0456731be 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -34,7 +34,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 20226da78c..34adac5226 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -36,7 +36,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_sse2.h" diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 8b7da7c747..35653a8e48 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -34,7 +34,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_sse2.h" diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 941ab62e00..038ff04d4f 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -41,7 +41,7 @@ #include "sp_prim_setup.h" #include "draw/draw_pipe.h" #include "draw/draw_vertex.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /** * Triangle setup info (derived from draw_stage). diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index e9fae951e0..425e13cd28 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -43,6 +43,7 @@ #include "sp_setup.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" +#include "util/u_memory.h" #define SP_MAX_VBUF_INDEXES 1024 diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 7a42b08ef5..7d3580fb4f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -7,7 +7,7 @@ #include "sp_headers.h" #include "sp_quad.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" static void diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 74c6bff84a..a834accb86 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -31,7 +31,8 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" @@ -128,15 +129,15 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) /* convert to ubyte */ for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */ - - UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */ + dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ + dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ + dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ + dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ + + src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ + src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ + src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ + src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ } switch (softpipe->blend->logicop_func) { @@ -209,10 +210,10 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) } for (j = 0; j < 4; j++) { - quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]); - quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]); - quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]); - quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]); + quadColor[j][0] = ubyte_to_float(res[j][0]); + quadColor[j][1] = ubyte_to_float(res[j][1]); + quadColor[j][2] = ubyte_to_float(res[j][2]); + quadColor[j][3] = ubyte_to_float(res[j][3]); } } diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c index b3db428ef1..92e9af09c1 100644 --- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -1,5 +1,5 @@ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index 7fe080990b..f72f31db97 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -31,7 +31,8 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index dd5ebb2296..ad907ec25f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_quad.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 0c82692c6e..227cb2014e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -30,7 +30,7 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c index 22ea99049f..5a66a86699 100644 --- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -30,7 +30,7 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_headers.h" #include "sp_quad.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 8c88c192f8..5499ba5361 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -35,7 +35,8 @@ * all the enabled attributes run contiguously. */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c index 54254df1f1..db13e73ae3 100644 --- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index 40083138a4..b64646a449 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c index b4c7e942fa..ce9562e07c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -10,7 +10,7 @@ #include "sp_tile_cache.h" #include "sp_quad.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /** Only 8-bit stencil supported */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index f1e9b80e09..a39ecc2e9d 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -7,7 +7,7 @@ #include "sp_headers.h" #include "sp_quad.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /** diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index adf9ccf64c..2106ee1d23 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -32,7 +32,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_query.h" diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index f6b3d7ac24..9644dbd168 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index c8c55fa6e8..87336ab6e3 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -42,9 +42,9 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vertex.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" +#include "util/u_memory.h" #define DEBUG_VERTS 0 diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index 2d40d6bd8f..384fe559af 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -28,7 +28,7 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f10a1fa471..6b6a4c3ff3 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -25,7 +25,8 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 76fe6bfef9..1be461b3a4 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -30,7 +30,7 @@ #include "sp_fs.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 98e04352db..87b7219683 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -26,7 +26,7 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 033288a0aa..99a28c0d7e 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -29,7 +29,7 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index bfbae234f1..389aceb27c 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -26,10 +26,9 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_rect.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 58a95d13e1..49250ec084 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -39,9 +39,9 @@ #include "sp_tile_cache.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "tgsi/tgsi_exec.h" #include "util/u_math.h" +#include "util/u_memory.h" /* diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index f775591352..3a737d6f72 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -33,8 +33,9 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 57c12ffe33..b50c984513 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -32,9 +32,9 @@ * Brian Paul */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_memory.h" +#include "util/u_tile.h" #include "sp_context.h" #include "sp_surface.h" #include "sp_texture.h" diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index f16359e8ad..1dd7719379 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_screen.h" #include "tr_dump.h" diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 1613a626df..48032c1617 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -45,6 +45,8 @@ #endif #include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "util/u_memory.h" #include "util/u_string.h" #include "tr_stream.h" diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index 6ddc8fc15c..76a53731b3 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -35,7 +35,6 @@ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" boolean trace_dump_trace_begin(void); diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index a6467ec35f..8789f86b1a 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "tr_dump.h" #include "tr_state.h" diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index 30ab5a8fdc..986d939e0c 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -27,6 +27,7 @@ #include "pipe/p_compiler.h" +#include "util/u_memory.h" #include "tgsi/tgsi_dump.h" #include "tr_dump.h" diff --git a/src/gallium/drivers/trace/tr_stream_stdc.c b/src/gallium/drivers/trace/tr_stream_stdc.c index 4c77e1c995..4c19ec0b24 100644 --- a/src/gallium/drivers/trace/tr_stream_stdc.c +++ b/src/gallium/drivers/trace/tr_stream_stdc.c @@ -36,7 +36,7 @@ #include -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "tr_stream.h" diff --git a/src/gallium/drivers/trace/tr_stream_wd.c b/src/gallium/drivers/trace/tr_stream_wd.c index b3b65f0971..704eb15bd7 100644 --- a/src/gallium/drivers/trace/tr_stream_wd.c +++ b/src/gallium/drivers/trace/tr_stream_wd.c @@ -37,7 +37,7 @@ #include #include -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_string.h" #include "tr_stream.h" diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c index 99ba74d366..440a78704a 100644 --- a/src/gallium/drivers/trace/tr_texture.c +++ b/src/gallium/drivers/trace/tr_texture.c @@ -25,9 +25,9 @@ * **************************************************************************/ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "util/u_hash_table.h" +#include "util/u_memory.h" #include "tr_screen.h" #include "tr_texture.h" diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 2c7a6f893b..177835854e 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -25,8 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" -#include "pipe/p_state.h" +#include "util/u_memory.h" #include "util/u_hash_table.h" #include "tr_dump.h" diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h deleted file mode 100644 index 4a3fca5962..0000000000 --- a/src/gallium/include/pipe/p_util.h +++ /dev/null @@ -1,460 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef P_UTIL_H -#define P_UTIL_H - -#include "p_config.h" -#include "p_compiler.h" -#include "p_debug.h" -#include "p_pointer.h" - -#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) -__inline double ceil(double val) -{ - double ceil_val; - - if((val - (long) val) == 0) { - ceil_val = val; - } else { - if(val > 0) { - ceil_val = (long) val + 1; - } else { - ceil_val = (long) val; - } - } - - return ceil_val; -} - -#ifndef PIPE_SUBSYSTEM_WINDOWS_CE -__inline double floor(double val) -{ - double floor_val; - - if((val - (long) val) == 0) { - floor_val = val; - } else { - if(val > 0) { - floor_val = (long) val; - } else { - floor_val = (long) val - 1; - } - } - - return floor_val; -} -#endif - -#pragma function(pow) -__inline double __cdecl pow(double val, double exponent) -{ - /* XXX */ - assert(0); - return 0; -} - -#pragma function(log) -__inline double __cdecl log(double val) -{ - /* XXX */ - assert(0); - return 0; -} - -#pragma function(atan2) -__inline double __cdecl atan2(double val) -{ - /* XXX */ - assert(0); - return 0; -} -#else -#include -#include -#endif - - /* Define ENOMEM for WINCE */ -#if (_WIN32_WCE < 600) -#ifndef ENOMEM -#define ENOMEM 12 -#endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) - -/* memory debugging */ - -#include "p_debug.h" - -#define MALLOC( _size ) \ - debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) -#define CALLOC( _count, _size ) \ - debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) -#define FREE( _ptr ) \ - debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) -#define REALLOC( _ptr, _old_size, _size ) \ - debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _size ) - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - -void * __stdcall -EngAllocMem( - unsigned long Flags, - unsigned long MemSize, - unsigned long Tag ); - -void __stdcall -EngFreeMem( - void *Mem ); - -#define MALLOC( _size ) EngAllocMem( 0, _size, 'D3AG' ) -#define _FREE( _ptr ) EngFreeMem( _ptr ) - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - -void * -ExAllocatePool( - unsigned long PoolType, - size_t NumberOfBytes); - -void -ExFreePool(void *P); - -#define MALLOC(_size) ExAllocatePool(0, _size) -#define _FREE(_ptr) ExFreePool(_ptr) - -#else - -#define MALLOC( SIZE ) malloc( SIZE ) -#define CALLOC( COUNT, SIZE ) calloc( COUNT, SIZE ) -#define FREE( PTR ) free( PTR ) -#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE ) realloc( OLDPTR, NEWSIZE ) - -#endif - - -#ifndef CALLOC -static INLINE void * -CALLOC( unsigned count, unsigned size ) -{ - void *ptr = MALLOC( count * size ); - if( ptr ) { - memset( ptr, 0, count * size ); - } - return ptr; -} -#endif /* !CALLOC */ - -#ifndef FREE -static INLINE void -FREE( void *ptr ) -{ - if( ptr ) { - _FREE( ptr ); - } -} -#endif /* !FREE */ - -#ifndef REALLOC -static INLINE void * -REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) -{ - void *new_ptr = NULL; - - if (new_size != 0) { - unsigned copy_size = old_size < new_size ? old_size : new_size; - new_ptr = MALLOC( new_size ); - if (new_ptr && old_ptr && copy_size) { - memcpy( new_ptr, old_ptr, copy_size ); - } - } - - FREE( old_ptr ); - return new_ptr; -} -#endif /* !REALLOC */ - - -#define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) - -#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) - - -/** - * Return memory on given byte alignment - */ -static INLINE void * -align_malloc(size_t bytes, uint alignment) -{ -#if defined(HAVE_POSIX_MEMALIGN) - void *mem; - alignment = (alignment + (uint)sizeof(void*) - 1) & ~((uint)sizeof(void*) - 1); - if(posix_memalign(& mem, alignment, bytes) != 0) - return NULL; - return mem; -#else - char *ptr, *buf; - - assert( alignment > 0 ); - - ptr = (char *) MALLOC(bytes + alignment + sizeof(void *)); - if (!ptr) - return NULL; - - buf = (char *) align_pointer( ptr + sizeof(void *), alignment ); - *(char **)(buf - sizeof(void *)) = ptr; - - return buf; -#endif /* defined(HAVE_POSIX_MEMALIGN) */ -} - -/** - * Free memory returned by align_malloc(). - */ -static INLINE void -align_free(void *ptr) -{ -#if defined(HAVE_POSIX_MEMALIGN) - FREE(ptr); -#else - void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); - void *realAddr = *cubbyHole; - FREE(realAddr); -#endif /* defined(HAVE_POSIX_MEMALIGN) */ -} - - - -/** - * Duplicate a block of memory. - */ -static INLINE void * -mem_dup(const void *src, uint size) -{ - void *dup = MALLOC(size); - if (dup) - memcpy(dup, src, size); - return dup; -} - - - -#define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) ) -#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) -#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) - -#ifndef Elements -#define Elements(x) (sizeof(x)/sizeof((x)[0])) -#endif -#define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER)) - -/** - * Return a pointer aligned to next multiple of 16 bytes. - */ -static INLINE void * -align16( void *unaligned ) -{ - return align_pointer( unaligned, 16 ); -} - - -static INLINE int align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} - - - - -#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) -static INLINE unsigned ffs( unsigned u ) -{ - unsigned i; - - if( u == 0 ) { - return 0; - } - - __asm bsf eax, [u] - __asm inc eax - __asm mov [i], eax - - return i; -} -#endif - -union fi { - float f; - int i; - unsigned ui; -}; - -#define UBYTE_TO_FLOAT( ub ) ((float)(ub) / 255.0F) - -#define IEEE_0996 0x3f7f0000 /* 0.996 or so */ - -/* This function/macro is sensitive to precision. Test very carefully - * if you change it! - */ -#define UNCLAMPED_FLOAT_TO_UBYTE(UB, F) \ - do { \ - union fi __tmp; \ - __tmp.f = (F); \ - if (__tmp.i < 0) \ - UB = (ubyte) 0; \ - else if (__tmp.i >= IEEE_0996) \ - UB = (ubyte) 255; \ - else { \ - __tmp.f = __tmp.f * (255.0f/256.0f) + 32768.0f; \ - UB = (ubyte) __tmp.i; \ - } \ - } while (0) - - - -static INLINE unsigned pack_ub4( unsigned char b0, - unsigned char b1, - unsigned char b2, - unsigned char b3 ) -{ - return ((((unsigned int)b0) << 0) | - (((unsigned int)b1) << 8) | - (((unsigned int)b2) << 16) | - (((unsigned int)b3) << 24)); -} - -static INLINE unsigned fui( float f ) -{ - union fi fi; - fi.f = f; - return fi.ui; -} - -static INLINE unsigned char float_to_ubyte( float f ) -{ - unsigned char ub; - UNCLAMPED_FLOAT_TO_UBYTE(ub, f); - return ub; -} - -static INLINE unsigned pack_ui32_float4( float a, - float b, - float c, - float d ) -{ - return pack_ub4( float_to_ubyte(a), - float_to_ubyte(b), - float_to_ubyte(c), - float_to_ubyte(d) ); -} - -#define COPY_4V( DST, SRC ) \ -do { \ - (DST)[0] = (SRC)[0]; \ - (DST)[1] = (SRC)[1]; \ - (DST)[2] = (SRC)[2]; \ - (DST)[3] = (SRC)[3]; \ -} while (0) - - -#define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC) - - -#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \ -do { \ - (DST)[0] = (V0); \ - (DST)[1] = (V1); \ - (DST)[2] = (V2); \ - (DST)[3] = (V3); \ -} while (0) - - - -#if defined(_MSC_VER) -#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - -static INLINE float cosf( float f ) -{ - return (float) cos( (double) f ); -} - -static INLINE float sinf( float f ) -{ - return (float) sin( (double) f ); -} - -static INLINE float ceilf( float f ) -{ - return (float) ceil( (double) f ); -} - -static INLINE float floorf( float f ) -{ - return (float) floor( (double) f ); -} - -static INLINE float powf( float f, float g ) -{ - return (float) pow( (double) f, (double) g ); -} - -static INLINE float sqrtf( float f ) -{ - return (float) sqrt( (double) f ); -} - -static INLINE float fabsf( float f ) -{ - return (float) fabs( (double) f ); -} - -static INLINE float logf( float f ) -{ - return (float) log( (double) f ); -} - -#else -/* Work-around an extra semi-colon in VS 2005 logf definition */ -#ifdef logf -#undef logf -#define logf(x) ((float)log((double)(x))) -#endif /* logf */ -#endif -#endif /* _MSC_VER */ - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index 641b19e940..a67372c623 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -42,7 +42,7 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "cso_cache/cso_context.h" #include "util/u_draw_quad.h" diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index a1889539dc..f71d85dd9b 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -26,12 +26,13 @@ **************************************************************************/ -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_inlines.h" #include "cso_cache/cso_context.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_simple_shaders.h" #include "trace/tr_screen.h" #include "trace/tr_context.h" diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c index b47c7be293..7765df3c4a 100644 --- a/src/gallium/state_trackers/python/st_sample.c +++ b/src/gallium/state_trackers/python/st_sample.c @@ -29,9 +29,10 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "st_sample.h" diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index 6ea3c9a5cf..2d4f5434b3 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -39,8 +39,9 @@ #include "pipe/p_winsys.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "st_winsys.h" diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.c b/src/gallium/winsys/drm/intel/common/intel_be_device.c index 8db0329615..019ee5cbd2 100644 --- a/src/gallium/winsys/drm/intel/common/intel_be_device.c +++ b/src/gallium/winsys/drm/intel/common/intel_be_device.c @@ -13,8 +13,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_memory.h" #include "i915simple/i915_screen.h" diff --git a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c index 0d98d16cf1..20920a2052 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c +++ b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c @@ -32,8 +32,8 @@ #include "intel_context.h" #include "intel_winsys_softpipe.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_format.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index 829732eea8..e9f821d276 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -38,8 +38,8 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "eglconfig.h" diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c index f4199e6f89..ae81d7f801 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.c +++ b/src/gallium/winsys/egl_xlib/sw_winsys.c @@ -37,8 +37,9 @@ #include "pipe/p_winsys.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sw_winsys.h" diff --git a/src/gallium/winsys/gdi/wmesa.c b/src/gallium/winsys/gdi/wmesa.c index ff52ceb8c4..730fb1b541 100644 --- a/src/gallium/winsys/gdi/wmesa.c +++ b/src/gallium/winsys/gdi/wmesa.c @@ -12,8 +12,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "glapi/glapi.h" #include "colors.h" diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/brw_aub.c index 6e814ce5d1..f319802962 100644 --- a/src/gallium/winsys/xlib/brw_aub.c +++ b/src/gallium/winsys/xlib/brw_aub.c @@ -34,7 +34,6 @@ #include "brw_aub.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_debug.h" diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 4b4dc56e84..68ead7f528 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -42,8 +42,9 @@ #include "pipe/p_winsys.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #ifdef GALLIUM_CELL diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index 7fc9debdd5..3439367636 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -37,7 +37,7 @@ #include "xmesaP.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "i965simple/brw_winsys.h" #include "i965simple/brw_screen.h" diff --git a/src/mesa/state_tracker/acc2.c b/src/mesa/state_tracker/acc2.c new file mode 100644 index 0000000000..fa5de2b764 --- /dev/null +++ b/src/mesa/state_tracker/acc2.c @@ -0,0 +1,319 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" + +#include "st_context.h" +#include "st_cb_accum.h" +#include "st_cb_fbo.h" +#include "st_draw.h" +#include "st_format.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/p_tile.h" + + +#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ + us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) ) + + +/** + * For hardware that supports deep color buffers, we could accelerate + * most/all the accum operations with blending/texturing. + * For now, just use the get/put_tile() functions and do things in software. + */ + + +static void +acc_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, + uint x, uint y, uint w, uint h, float *p) +{ + const enum pipe_format f = acc_ps->format; + const int cpp = acc_ps->cpp; + + acc_ps->format = PIPE_FORMAT_R16G16B16A16_SNORM; + acc_ps->cpp = 8; + + pipe_get_tile_rgba(pipe, acc_ps, x, y, w, h, p); + + acc_ps->format = f; + acc_ps->cpp = cpp; +} + + +static void +acc_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, + uint x, uint y, uint w, uint h, const float *p) +{ + enum pipe_format f = acc_ps->format; + const int cpp = acc_ps->cpp; + + acc_ps->format = PIPE_FORMAT_R16G16B16A16_SNORM; + acc_ps->cpp = 8; + + pipe_put_tile_rgba(pipe, acc_ps, x, y, w, h, p); + + acc_ps->format = f; + acc_ps->cpp = cpp; +} + + + +void +st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct st_renderbuffer *acc_strb = st_renderbuffer(rb); + struct pipe_surface *acc_ps = acc_strb->surface; + const GLint xpos = ctx->DrawBuffer->_Xmin; + const GLint ypos = ctx->DrawBuffer->_Ymin; + const GLint width = ctx->DrawBuffer->_Xmax - xpos; + const GLint height = ctx->DrawBuffer->_Ymax - ypos; + const GLfloat r = ctx->Accum.ClearColor[0]; + const GLfloat g = ctx->Accum.ClearColor[1]; + const GLfloat b = ctx->Accum.ClearColor[2]; + const GLfloat a = ctx->Accum.ClearColor[3]; + GLfloat *accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + int i; + +#if 1 + GLvoid *map; + + map = pipe_surface_map(acc_ps); + switch (acc_strb->format) { + case PIPE_FORMAT_R16G16B16A16_SNORM: + { + GLshort r = FLOAT_TO_SHORT(ctx->Accum.ClearColor[0]); + GLshort g = FLOAT_TO_SHORT(ctx->Accum.ClearColor[1]); + GLshort b = FLOAT_TO_SHORT(ctx->Accum.ClearColor[2]); + GLshort a = FLOAT_TO_SHORT(ctx->Accum.ClearColor[3]); + int i, j; + for (i = 0; i < height; i++) { + GLshort *dst = ((GLshort *) map + + ((ypos + i) * acc_ps->pitch + xpos) * 4); + for (j = 0; j < width; j++) { + dst[0] = r; + dst[1] = g; + dst[2] = b; + dst[3] = a; + dst += 4; + } + } + } + break; + default: + _mesa_problem(ctx, "unexpected format in st_clear_accum_buffer()"); + } + + pipe_surface_unmap(acc_ps); + +#else + for (i = 0; i < width * height; i++) { + accBuf[i*4+0] = r; + accBuf[i*4+1] = g; + accBuf[i*4+2] = b; + accBuf[i*4+3] = a; + } + + acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); +#endif +} + + +/** For ADD/MULT */ +static void +accum_mad(struct pipe_context *pipe, GLfloat scale, GLfloat bias, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps) +{ + GLfloat *accBuf; + GLint i; + + accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + for (i = 0; i < 4 * width * height; i++) { + accBuf[i] = accBuf[i] * scale + bias; + } + + pipe_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + free(accBuf); +} + + +static void +accum_accum(struct pipe_context *pipe, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps, + struct pipe_surface *color_ps) +{ + GLfloat *colorBuf, *accBuf; + GLint i; + + colorBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, colorBuf); + acc_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + for (i = 0; i < 4 * width * height; i++) { + accBuf[i] = accBuf[i] + colorBuf[i] * value; + } + + acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + free(colorBuf); + free(accBuf); +} + + +static void +accum_load(struct pipe_context *pipe, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps, + struct pipe_surface *color_ps) +{ + GLfloat *buf; + GLint i; + + buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, buf); + + for (i = 0; i < 4 * width * height; i++) { + buf[i] = buf[i] * value; + } + + acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, buf); + + free(buf); +} + + +static void +accum_return(GLcontext *ctx, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps, + struct pipe_surface *color_ps) +{ + struct pipe_context *pipe = ctx->st->pipe; + const GLubyte *colormask = ctx->Color.ColorMask; + GLfloat *abuf, *cbuf = NULL; + GLint i, ch; + + abuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + acc_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, abuf); + + if (!colormask[0] || !colormask[1] || !colormask[2] || !colormask[3]) { + cbuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, cbuf); + } + + for (i = 0; i < width * height; i++) { + for (ch = 0; ch < 4; ch++) { + if (colormask[ch]) { + GLfloat val = abuf[i * 4 + ch] * value; + abuf[i * 4 + ch] = CLAMP(val, 0.0, 1.0); + } + else { + abuf[i * 4 + ch] = cbuf[i * 4 + ch]; + } + } + } + + pipe_put_tile_rgba(pipe, color_ps, xpos, ypos, width, height, abuf); + + free(abuf); + if (cbuf) + free(cbuf); +} + + +static void +st_Accum(GLcontext *ctx, GLenum op, GLfloat value) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + struct st_renderbuffer *acc_strb + = st_renderbuffer(ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer); + struct st_renderbuffer *color_strb + = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + struct pipe_surface *acc_ps = acc_strb->surface; + struct pipe_surface *color_ps = color_strb->surface; + + const GLint xpos = ctx->DrawBuffer->_Xmin; + const GLint ypos = ctx->DrawBuffer->_Ymin; + const GLint width = ctx->DrawBuffer->_Xmax - xpos; + const GLint height = ctx->DrawBuffer->_Ymax - ypos; + + /* make sure color bufs aren't cached */ + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + + switch (op) { + case GL_ADD: + if (value != 0.0F) { + accum_mad(pipe, 1.0, value, xpos, ypos, width, height, acc_ps); + } + break; + case GL_MULT: + if (value != 1.0F) { + accum_mad(pipe, value, 0.0, xpos, ypos, width, height, acc_ps); + } + break; + case GL_ACCUM: + if (value != 0.0F) { + accum_accum(pipe, value, xpos, ypos, width, height, acc_ps, color_ps); + } + break; + case GL_LOAD: + accum_load(pipe, value, xpos, ypos, width, height, acc_ps, color_ps); + break; + case GL_RETURN: + accum_return(ctx, value, xpos, ypos, width, height, acc_ps, color_ps); + break; + default: + assert(0); + } +} + + + +void st_init_accum_functions(struct dd_function_table *functions) +{ + functions->Accum = st_Accum; +} diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index a992e08ff6..cf3a99e7e9 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -42,7 +42,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index d5696a909f..a0c305d66f 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -50,7 +50,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_draw_quad.h" #include "util/u_simple_shaders.h" #include "shader/prog_instruction.h" diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 0c5e21d4ff..4ec7c752df 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -55,7 +55,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_draw_quad.h" #include "shader/prog_instruction.h" #include "cso_cache/cso_context.h" diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 39f5856f94..c801532788 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -41,7 +41,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "st_context.h" #include "st_cb_bitmap.h" #include "st_cb_readpixels.h" diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 6177ac63f0..16bbf3d80f 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -51,7 +51,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_blit.h" diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 325d95e865..936a6e32ea 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -55,7 +55,7 @@ #define TGSI_DEBUG 0 -/** XXX we should use the version of this from p_util.h but including +/** XXX we should use the version of this from u_memory.h but including * that header causes symbol collisions. */ static INLINE void * diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 63046a0ecc..73cebff33f 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -36,7 +36,6 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "util/u_rect.h" -- cgit v1.2.3