diff options
Diffstat (limited to 'src/mesa/pipe/cell/spu/spu_exec.c')
-rw-r--r-- | src/mesa/pipe/cell/spu/spu_exec.c | 1948 |
1 files changed, 0 insertions, 1948 deletions
diff --git a/src/mesa/pipe/cell/spu/spu_exec.c b/src/mesa/pipe/cell/spu/spu_exec.c deleted file mode 100644 index e51008b9b3..0000000000 --- a/src/mesa/pipe/cell/spu/spu_exec.c +++ /dev/null @@ -1,1948 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * TGSI interpretor/executor. - * - * Flow control information: - * - * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) - * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special - * care since a condition may be true for some quad components but false - * for other components. - * - * We basically execute all statements (even if they're in the part of - * an IF/ELSE clause that's "not taken") and use a special mask to - * control writing to destination registers. This is the ExecMask. - * See store_dest(). - * - * The ExecMask is computed from three other masks (CondMask, LoopMask and - * ContMask) which are controlled by the flow control instructions (namely: - * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). - * - * - * Authors: - * Michal Krol - * Brian Paul - */ - -#include <libmisc.h> -#include <spu_mfcio.h> -#include <transpose_matrix4x4.h> -#include <simdmath/ceilf4.h> -#include <simdmath/cosf4.h> -#include <simdmath/divf4.h> -#include <simdmath/floorf4.h> -#include <simdmath/log2f4.h> -#include <simdmath/powf4.h> -#include <simdmath/sinf4.h> -#include <simdmath/sqrtf4.h> -#include <simdmath/truncf4.h> - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" -#include "spu_exec.h" -#include "spu_main.h" -#include "spu_vertex_shader.h" - -#define TILE_TOP_LEFT 0 -#define TILE_TOP_RIGHT 1 -#define TILE_BOTTOM_LEFT 2 -#define TILE_BOTTOM_RIGHT 3 - -/* - * Shorthand locations of various utility registers (_I = Index, _C = Channel) - */ -#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I -#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C -#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I -#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C -#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I -#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C -#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I -#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C -#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I -#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C -#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I -#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C -#define TEMP_128_I TGSI_EXEC_TEMP_128_I -#define TEMP_128_C TGSI_EXEC_TEMP_128_C -#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I -#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C -#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I -#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C -#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I -#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C -#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I -#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C -#define TEMP_R0 TGSI_EXEC_TEMP_R0 - -#define FOR_EACH_CHANNEL(CHAN)\ - for (CHAN = 0; CHAN < 4; CHAN++) - -#define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) - -#define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) - -#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED( INST, CHAN )) - -#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED2( INST, CHAN )) - - -/** The execution mask depends on the conditional mask and the loop mask */ -#define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask - - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - - - -/** - * Initialize machine state by expanding tokens to full instructions, - * allocating temporary storage, setting up constants, etc. - * After this, we can call spu_exec_machine_run() many times. - */ -void -spu_exec_machine_init(struct spu_exec_machine *mach, - uint numSamplers, - struct spu_sampler *samplers, - unsigned processor) -{ - qword zero; - qword not_zero; - uint i; - - mach->Samplers = samplers; - mach->Processor = processor; - mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; - - zero = si_xor(zero, zero); - not_zero = si_xori(zero, 0xff); - - /* Setup constants. */ - mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; - mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; - mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); - mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); - - mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); - mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); - mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); - mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); -} - - -static INLINE qword -micro_abs(qword src) -{ - return si_rotmi(si_shli(src, 1), -1); -} - -static INLINE qword -micro_ceil(qword src) -{ - return (qword) _ceilf4((vec_float4) src); -} - -static INLINE qword -micro_cos(qword src) -{ - return (qword) _cosf4((vec_float4) src); -} - -static const qword br_shuf = { - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, -}; - -static const qword bl_shuf = { - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, -}; - -static const qword tl_shuf = { - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, -}; - -static qword -micro_ddx(qword src) -{ - qword bottom_right = si_shufb(src, src, br_shuf); - qword bottom_left = si_shufb(src, src, bl_shuf); - - return si_fs(bottom_right, bottom_left); -} - -static qword -micro_ddy(qword src) -{ - qword top_left = si_shufb(src, src, tl_shuf); - qword bottom_left = si_shufb(src, src, bl_shuf); - - return si_fs(top_left, bottom_left); -} - -static INLINE qword -micro_div(qword src0, qword src1) -{ - return (qword) _divf4((vec_float4) src0, (vec_float4) src1); -} - -static qword -micro_flr(qword src) -{ - return (qword) _floorf4((vec_float4) src); -} - -static qword -micro_frc(qword src) -{ - return si_fs(src, (qword) _floorf4((vec_float4) src)); -} - -static INLINE qword -micro_ge(qword src0, qword src1) -{ - return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); -} - -static qword -micro_lg2(qword src) -{ - return (qword) _log2f4((vec_float4) src); -} - -static INLINE qword -micro_lt(qword src0, qword src1) -{ - const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); - - return si_xori(tmp, 0xff); -} - -static INLINE qword -micro_max(qword src0, qword src1) -{ - return si_selb(src1, src0, si_fcgt(src0, src1)); -} - -static INLINE qword -micro_min(qword src0, qword src1) -{ - return si_selb(src0, src1, si_fcgt(src0, src1)); -} - -static qword -micro_neg(qword src) -{ - return si_xor(src, (qword) spu_splats(0x80000000)); -} - -static qword -micro_set_sign(qword src) -{ - return si_or(src, (qword) spu_splats(0x80000000)); -} - -static qword -micro_pow(qword src0, qword src1) -{ - return (qword) _powf4((vec_float4) src0, (vec_float4) src1); -} - -static qword -micro_rnd(qword src) -{ - const qword half = (qword) spu_splats(0.5f); - - /* May be able to use _roundf4. There may be some difference, though. - */ - return (qword) _floorf4((vec_float4) si_fa(src, half)); -} - -static INLINE qword -micro_ishr(qword src0, qword src1) -{ - return si_rotma(src0, si_sfi(src1, 0)); -} - -static qword -micro_trunc(qword src) -{ - return (qword) _truncf4((vec_float4) src); -} - -static qword -micro_sin(qword src) -{ - return (qword) _sinf4((vec_float4) src); -} - -static INLINE qword -micro_sqrt(qword src) -{ - return (qword) _sqrtf4((vec_float4) src); -} - -static void -fetch_src_file_channel( - const struct spu_exec_machine *mach, - const uint file, - const uint swizzle, - const union spu_exec_channel *index, - union spu_exec_channel *chan ) -{ - switch( swizzle ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch( file ) { - case TGSI_FILE_CONSTANT: { - unsigned char buffer[32] ALIGN16_ATTRIB; - unsigned i; - - for (i = 0; i < 4; i++) { - const float *ptr = mach->Consts[index->i[i]]; - const uint64_t addr = (uint64_t)(uintptr_t) ptr; - const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32; - - mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0); - wait_on_mask(1 << TAG_VERTEX_BUFFER); - - (void) memcpy(& chan->f[i], &buffer[(addr & 0x0f) - + (sizeof(float) * swizzle)], sizeof(float)); - } - break; - } - - case TGSI_FILE_INPUT: - chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_TEMPORARY: - chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_IMMEDIATE: - assert( index->i[0] < (int) mach->ImmLimit ); - assert( index->i[1] < (int) mach->ImmLimit ); - assert( index->i[2] < (int) mach->ImmLimit ); - assert( index->i[3] < (int) mach->ImmLimit ); - - chan->f[0] = mach->Imms[index->i[0]][swizzle]; - chan->f[1] = mach->Imms[index->i[1]][swizzle]; - chan->f[2] = mach->Imms[index->i[2]][swizzle]; - chan->f[3] = mach->Imms[index->i[3]][swizzle]; - break; - - case TGSI_FILE_ADDRESS: - chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_OUTPUT: - /* vertex/fragment output vars can be read too */ - chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - default: - assert( 0 ); - } - break; - - case TGSI_EXTSWIZZLE_ZERO: - *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; - break; - - case TGSI_EXTSWIZZLE_ONE: - *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; - break; - - default: - assert( 0 ); - } -} - -static void -fetch_source( - const struct spu_exec_machine *mach, - union spu_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) -{ - union spu_exec_channel index; - uint swizzle; - - index.i[0] = - index.i[1] = - index.i[2] = - index.i[3] = reg->SrcRegister.Index; - - if (reg->SrcRegister.Indirect) { - union spu_exec_channel index2; - union spu_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->SrcRegisterInd.Index; - - swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, - CHAN_X); - fetch_src_file_channel( - mach, - reg->SrcRegisterInd.File, - swizzle, - &index2, - &indir_index ); - - index.q = si_a(index.q, indir_index.q); - } - - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { - case TGSI_FILE_INPUT: - index.q = si_mpyi(index.q, 17); - break; - case TGSI_FILE_CONSTANT: - index.q = si_shli(index.q, 12); - break; - default: - assert( 0 ); - } - - index.i[0] += reg->SrcRegisterDim.Index; - index.i[1] += reg->SrcRegisterDim.Index; - index.i[2] += reg->SrcRegisterDim.Index; - index.i[3] += reg->SrcRegisterDim.Index; - - if (reg->SrcRegisterDim.Indirect) { - union spu_exec_channel index2; - union spu_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->SrcRegisterDimInd.Index; - - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); - fetch_src_file_channel( - mach, - reg->SrcRegisterDimInd.File, - swizzle, - &index2, - &indir_index ); - - index.q = si_a(index.q, indir_index.q); - } - } - - swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); - fetch_src_file_channel( - mach, - reg->SrcRegister.File, - swizzle, - &index, - chan ); - - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - chan->q = micro_abs(chan->q); - break; - - case TGSI_UTIL_SIGN_SET: - chan->q = micro_set_sign(chan->q); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - chan->q = micro_neg(chan->q); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; - } - - if (reg->SrcRegisterExtMod.Complement) { - chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); - } -} - -static void -store_dest( - struct spu_exec_machine *mach, - const union spu_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) -{ - union spu_exec_channel *dst; - - switch( reg->DstRegister.File ) { - case TGSI_FILE_NULL: - return; - - case TGSI_FILE_OUTPUT: - dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->DstRegister.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_TEMPORARY: - dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_ADDRESS: - dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; - break; - - default: - assert( 0 ); - return; - } - - switch (inst->Instruction.Saturate) - { - case TGSI_SAT_NONE: - if (mach->ExecMask & 0x1) - dst->i[0] = chan->i[0]; - if (mach->ExecMask & 0x2) - dst->i[1] = chan->i[1]; - if (mach->ExecMask & 0x4) - dst->i[2] = chan->i[2]; - if (mach->ExecMask & 0x8) - dst->i[3] = chan->i[3]; - break; - - case TGSI_SAT_ZERO_ONE: - /* XXX need to obey ExecMask here */ - dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); - break; - - default: - assert( 0 ); - } -} - -#define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) - -#define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) - - -/** - * Execute ARB-style KIL which is predicated by a src register. - * Kill fragment if any of the four values is less than zero. - */ -static void -exec_kilp(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - uint uniquemask; - uint chan_index; - uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ - union spu_exec_channel r[1]; - - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); - - for (chan_index = 0; chan_index < 4; chan_index++) - { - uint swizzle; - uint i; - - /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle ( - &inst->FullSrcRegisters[0], - chan_index); - - /* check if the component has not been already tested */ - if (uniquemask & (1 << swizzle)) - continue; - uniquemask |= 1 << swizzle; - - FETCH(&r[0], 0, chan_index); - for (i = 0; i < 4; i++) - if (r[0].f[i] < 0.0f) - kilmask |= 1 << i; - } - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; -} - - -/* - * Fetch a texel using STR texture coordinates. - */ -static void -fetch_texel( struct spu_sampler *sampler, - const union spu_exec_channel *s, - const union spu_exec_channel *t, - const union spu_exec_channel *p, - float lodbias, /* XXX should be float[4] */ - union spu_exec_channel *r, - union spu_exec_channel *g, - union spu_exec_channel *b, - union spu_exec_channel *a ) -{ - qword rgba[4]; - qword out[4]; - - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba); - - _transpose_matrix4x4(out, rgba); - r->q = out[0]; - g->q = out[1]; - b->q = out[2]; - a->q = out[3]; -} - - -static void -exec_tex(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst, - boolean biasLod) -{ - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; - union spu_exec_channel r[8]; - uint chan_index; - float lodBias; - - /* printf("Sampler %u unit %u\n", sampler, unit); */ - - switch (inst->InstructionExtTexture.Texture) { - case TGSI_TEXTURE_1D: - - FETCH(&r[0], 0, CHAN_X); - - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: - FETCH(&r[1], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[1].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); - } - - if (biasLod) { - FETCH(&r[1], 0, CHAN_W); - lodBias = r[2].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ - break; - - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: - FETCH(&r[3], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[3].q); - r[1].q = micro_div(r[1].q, r[3].q); - r[2].q = micro_div(r[2].q, r[3].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ - break; - - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: - FETCH(&r[3], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[3].q); - r[1].q = micro_div(r[1].q, r[3].q); - r[2].q = micro_div(r[2].q, r[3].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, - &r[0], &r[1], &r[2], &r[3]); - break; - - default: - assert (0); - } - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); - } -} - - - -static void -constant_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; - } -} - -static void -linear_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - mach->Inputs[attrib].xyzw[chan].f[0] = a0; - mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; - mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; - mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; -} - -static void -perspective_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - const float *w = mach->QuadPos.xyzw[3].f; - /* divide by W here */ - mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; - mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; - mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; - mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; -} - - -typedef void (* interpolation_func)( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ); - -static void -exec_declaration(struct spu_exec_machine *mach, - const struct tgsi_full_declaration *decl) -{ - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - interpolation_func interp; - - assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); - - first = decl->u.DeclarationRange.First; - last = decl->u.DeclarationRange.Last; - mask = decl->Declaration.UsageMask; - - switch( decl->Interpolation.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - interp = constant_interpolation; - break; - - case TGSI_INTERPOLATE_LINEAR: - interp = linear_interpolation; - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - interp = perspective_interpolation; - break; - - default: - assert( 0 ); - } - - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - interp( mach, i, j ); - } - } - } - else { - unsigned i, j; - - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - interp( mach, i, j ); - } - } - } - } - } - } -} - -static void -exec_instruction( - struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst, - int *pc ) -{ - uint chan_index; - union spu_exec_channel r[8]; - - (*pc)++; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_cflts(r[0].q, 0); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOV: - /* TGSI_OPCODE_SWZ */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_X ); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[1], 0, CHAN_Y ); - r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - - FETCH( &r[2], 0, CHAN_W ); - r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); - r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); - r[1].q = micro_pow(r[1].q, r[2].q); - - /* r0 = (r0 > 0.0) ? r1 : 0.0 - */ - r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, - r[0].q); - STORE( &r[0], 0, CHAN_Z ); - } - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_sqrt(r[0].q); - r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXP: - assert (0); - break; - - case TGSI_OPCODE_LOG: - assert (0); - break; - - case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) - { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = si_fm(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_ADD: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_fa(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - - FETCH( &r[1], 0, CHAN_Z ); - FETCH( &r[2], 1, CHAN_Z ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_W); - FETCH(&r[2], 1, CHAN_W); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - FETCH( &r[0], 0, CHAN_Y ); - FETCH( &r[1], 1, CHAN_Y); - r[0].q = si_fm(r[0].q, r[1].q); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MIN: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = micro_min(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_MAX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = micro_max(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = micro_ge(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = micro_ge(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - FETCH( &r[2], 2, chan_index ); - r[0].q = si_fma(r[0].q, r[1].q, r[2].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SUB: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = si_fs(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - r[1].q = si_fs(r[1].q, r[2].q); - r[0].q = si_fma(r[0].q, r[1].q, r[2].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_CND: - assert (0); - break; - - case TGSI_OPCODE_CND0: - assert (0); - break; - - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ - assert (0); - break; - - case TGSI_OPCODE_INDEX: - assert (0); - break; - - case TGSI_OPCODE_NEGATE: - assert (0); - break; - - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_frc(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_CLAMP: - assert (0); - break; - - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_flr(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_ROUND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_rnd(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ - FETCH(&r[0], 0, CHAN_X); - - r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_lg2(r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = micro_pow(r[0].q, r[1].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_CROSSPRODUCT: - /* TGSI_OPCODE_XPD */ - FETCH(&r[0], 0, CHAN_Y); - FETCH(&r[1], 1, CHAN_Z); - FETCH(&r[3], 0, CHAN_Z); - FETCH(&r[4], 1, CHAN_Y); - - /* r2 = (r0 * r1) - (r3 * r5) - */ - r[2].q = si_fm(r[3].q, r[5].q); - r[2].q = si_fms(r[0].q, r[1].q, r[2].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[2], 0, CHAN_X ); - } - - FETCH(&r[2], 1, CHAN_X); - FETCH(&r[5], 0, CHAN_X); - - /* r3 = (r3 * r2) - (r1 * r5) - */ - r[1].q = si_fm(r[1].q, r[5].q); - r[3].q = si_fms(r[3].q, r[2].q, r[1].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - STORE( &r[3], 0, CHAN_Y ); - } - - /* r5 = (r5 * r4) - (r0 * r2) - */ - r[0].q = si_fm(r[0].q, r[2].q); - r[5].q = si_fms(r[5].q, r[4].q, r[0].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[5], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MULTIPLYMATRIX: - assert (0); - break; - - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - - r[0].q = micro_abs(r[0].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_RCC: - assert (0); - break; - - case TGSI_OPCODE_DPH: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 1, CHAN_W); - - r[0].q = si_fa(r[0].q, r[1].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - r[0].q = micro_cos(r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ddx(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ddy(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_KILP: - exec_kilp (mach, inst); - break; - - case TGSI_OPCODE_KIL: - /* for enabled ExecMask bits, set the killed bit */ - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; - break; - - case TGSI_OPCODE_PK2H: - assert (0); - break; - - case TGSI_OPCODE_PK2US: - assert (0); - break; - - case TGSI_OPCODE_PK4B: - assert (0); - break; - - case TGSI_OPCODE_PK4UB: - assert (0); - break; - - case TGSI_OPCODE_RFL: - assert (0); - break; - - case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fceq(r[0].q, r[1].q); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SFL: - assert (0); - break; - - case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_fcgt(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_sin(r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fcgt(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fceq(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_STR: - assert (0); - break; - - case TGSI_OPCODE_TEX: - /* simple texture lookup */ - /* src[0] = texcoord */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE); - break; - - case TGSI_OPCODE_TXB: - /* Texture lookup with lod bias */ - /* src[0] = texcoord (src[0].w = load bias) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); - break; - - case TGSI_OPCODE_TXD: - /* Texture lookup with explict partial derivatives */ - /* src[0] = texcoord */ - /* src[1] = d[strq]/dx */ - /* src[2] = d[strq]/dy */ - /* src[3] = sampler unit */ - assert (0); - break; - - case TGSI_OPCODE_TXL: - /* Texture lookup with explit LOD */ - /* src[0] = texcoord (src[0].w = load bias) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); - break; - - case TGSI_OPCODE_UP2H: - assert (0); - break; - - case TGSI_OPCODE_UP2US: - assert (0); - break; - - case TGSI_OPCODE_UP4B: - assert (0); - break; - - case TGSI_OPCODE_UP4UB: - assert (0); - break; - - case TGSI_OPCODE_X2D: - assert (0); - break; - - case TGSI_OPCODE_ARA: - assert (0); - break; - - case TGSI_OPCODE_ARR: - assert (0); - break; - - case TGSI_OPCODE_BRA: - assert (0); - break; - - case TGSI_OPCODE_CAL: - /* skip the call if no execution channels are enabled */ - if (mach->ExecMask) { - /* do the call */ - - /* push the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - - assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); - mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - - /* note that PC was already incremented above */ - mach->CallStack[mach->CallStackTop++] = *pc; - *pc = inst->InstructionExtLabel.Label; - } - break; - - case TGSI_OPCODE_RET: - mach->FuncMask &= ~mach->ExecMask; - UPDATE_EXEC_MASK(mach); - - if (mach->ExecMask == 0x0) { - /* really return now (otherwise, keep executing */ - - if (mach->CallStackTop == 0) { - /* returning from main() */ - *pc = -1; - return; - } - *pc = mach->CallStack[--mach->CallStackTop]; - - /* pop the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->FuncStackTop > 0); - mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; - - UPDATE_EXEC_MASK(mach); - } - break; - - case TGSI_OPCODE_SSG: - assert (0); - break; - - case TGSI_OPCODE_CMP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - /* r0 = (r0 < 0.0) ? r1 : r2 - */ - r[3].q = si_xor(r[3].q, r[3].q); - r[0].q = micro_lt(r[0].q, r[3].q); - r[0].q = si_selb(r[1].q, r[2].q, r[0].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_SCS: - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( &r[0], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { - r[1].q = micro_cos(r[0].q); - STORE( &r[1], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - r[1].q = micro_sin(r[0].q); - STORE( &r[1], 0, CHAN_Y ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_NRM: - assert (0); - break; - - case TGSI_OPCODE_DIV: - assert( 0 ); - break; - - case TGSI_OPCODE_DP2: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_IF: - /* push CondMask */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - FETCH( &r[0], 0, CHAN_X ); - /* update CondMask */ - if( ! r[0].u[0] ) { - mach->CondMask &= ~0x1; - } - if( ! r[0].u[1] ) { - mach->CondMask &= ~0x2; - } - if( ! r[0].u[2] ) { - mach->CondMask &= ~0x4; - } - if( ! r[0].u[3] ) { - mach->CondMask &= ~0x8; - } - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ELSE */ - break; - - case TGSI_OPCODE_ELSE: - /* invert CondMask wrt previous mask */ - { - uint prevMask; - assert(mach->CondStackTop > 0); - prevMask = mach->CondStack[mach->CondStackTop - 1]; - mach->CondMask = ~mach->CondMask & prevMask; - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ENDIF */ - } - break; - - case TGSI_OPCODE_ENDIF: - /* pop CondMask */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_END: - /* halt execution */ - *pc = -1; - break; - - case TGSI_OPCODE_REP: - assert (0); - break; - - case TGSI_OPCODE_ENDREP: - assert (0); - break; - - case TGSI_OPCODE_PUSHA: - assert (0); - break; - - case TGSI_OPCODE_POPA: - assert (0); - break; - - case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ceil(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_csflt(r[0].q, 0); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_xorbi(r[0].q, 0xff); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_trunc(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_shl(r[0].q, r[1].q); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = micro_ishr(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_and(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_or(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOD: - assert (0); - break; - - case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_xor(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SAD: - assert (0); - break; - - case TGSI_OPCODE_TXF: - assert (0); - break; - - case TGSI_OPCODE_TXQ: - assert (0); - break; - - case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; - break; - - case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; - break; - - case TGSI_OPCODE_LOOP: - /* fall-through (for now) */ - case TGSI_OPCODE_BGNLOOP2: - /* push LoopMask and ContMasks */ - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - break; - - case TGSI_OPCODE_ENDLOOP: - /* fall-through (for now at least) */ - case TGSI_OPCODE_ENDLOOP2: - /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - if (mach->LoopMask) { - /* repeat loop: jump to instruction just past BGNLOOP */ - *pc = inst->InstructionExtLabel.Label + 1; - } - else { - /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - /* pop ContMask */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - } - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_CONT: - /* turn off cont channels for each enabled exec channel */ - mach->ContMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BGNSUB: - /* no-op */ - break; - - case TGSI_OPCODE_ENDSUB: - /* no-op */ - break; - - case TGSI_OPCODE_NOISE1: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE2: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE3: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE4: - assert( 0 ); - break; - - case TGSI_OPCODE_NOP: - break; - - default: - assert( 0 ); - } -} - - -/** - * Run TGSI interpreter. - * \return bitmask of "alive" quad components - */ -uint -spu_exec_machine_run( struct spu_exec_machine *mach ) -{ - uint i; - int pc = 0; - - mach->CondMask = 0xf; - mach->LoopMask = 0xf; - mach->ContMask = 0xf; - mach->FuncMask = 0xf; - mach->ExecMask = 0xf; - - mach->CondStackTop = 0; /* temporarily subvert this assertion */ - assert(mach->CondStackTop == 0); - assert(mach->LoopStackTop == 0); - assert(mach->ContStackTop == 0); - assert(mach->CallStackTop == 0); - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; - - if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; - mach->Primitives[0] = 0; - } - - - /* execute declarations (interpolants) */ - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - for (i = 0; i < mach->NumDeclarations; i++) { - uint8_t buffer[sizeof(struct tgsi_full_declaration) + 32] ALIGN16_ATTRIB; - struct tgsi_full_declaration decl; - unsigned long decl_addr = (unsigned long) (mach->Declarations+i); - unsigned size = ((sizeof(decl) + (decl_addr & 0x0f) + 0x0f) & ~0x0f); - - mfc_get(buffer, decl_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); - wait_on_mask(1 << TAG_INSTRUCTION_FETCH); - - memcpy(& decl, buffer + (decl_addr & 0x0f), sizeof(decl)); - exec_declaration( mach, &decl ); - } - } - - /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - uint8_t buffer[sizeof(struct tgsi_full_instruction) + 32] ALIGN16_ATTRIB; - struct tgsi_full_instruction inst; - unsigned long inst_addr = (unsigned long) (mach->Instructions + pc); - unsigned size = ((sizeof(inst) + (inst_addr & 0x0f) + 0x0f) & ~0x0f); - - assert(pc < mach->NumInstructions); - mfc_get(buffer, inst_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); - wait_on_mask(1 << TAG_INSTRUCTION_FETCH); - - memcpy(& inst, buffer + (inst_addr & 0x0f), sizeof(inst)); - exec_instruction( mach, & inst, &pc ); - } - -#if 0 - /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ - if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { - /* - * Scale back depth component. - */ - for (i = 0; i < 4; i++) - mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; - } -#endif - - return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; -} - - |