summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi/exec
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/tgsi/exec')
-rw-r--r--src/gallium/auxiliary/tgsi/exec/Makefile2
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.c2522
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.h253
-rwxr-xr-xsrc/gallium/auxiliary/tgsi/exec/tgsi_sse2.c2275
-rwxr-xr-xsrc/gallium/auxiliary/tgsi/exec/tgsi_sse2.h49
5 files changed, 0 insertions, 5101 deletions
diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile
deleted file mode 100644
index 451911a354..0000000000
--- a/src/gallium/auxiliary/tgsi/exec/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-default:
- cd .. ; make
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
deleted file mode 100644
index 001a4c4b15..0000000000
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ /dev/null
@@ -1,2522 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * TGSI interpretor/executor.
- *
- * Flow control information:
- *
- * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
- * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
- * care since a condition may be true for some quad components but false
- * for other components.
- *
- * We basically execute all statements (even if they're in the part of
- * an IF/ELSE clause that's "not taken") and use a special mask to
- * control writing to destination registers. This is the ExecMask.
- * See store_dest().
- *
- * The ExecMask is computed from three other masks (CondMask, LoopMask and
- * ContMask) which are controlled by the flow control instructions (namely:
- * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
- *
- *
- * Authors:
- * Michal Krol
- * Brian Paul
- */
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
-#include "tgsi_exec.h"
-
-#define TILE_TOP_LEFT 0
-#define TILE_TOP_RIGHT 1
-#define TILE_BOTTOM_LEFT 2
-#define TILE_BOTTOM_RIGHT 3
-
-/*
- * Shorthand locations of various utility registers (_I = Index, _C = Channel)
- */
-#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
-#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
-#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
-#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
-#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
-#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
-#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
-#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
-#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
-#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
-#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
-#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
-#define TEMP_128_I TGSI_EXEC_TEMP_128_I
-#define TEMP_128_C TGSI_EXEC_TEMP_128_C
-#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
-#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
-#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
-#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
-#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
-#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
-#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
-#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
-#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
-#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
-#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
-#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
-#define TEMP_R0 TGSI_EXEC_TEMP_R0
-
-#define FOR_EACH_CHANNEL(CHAN)\
- for (CHAN = 0; CHAN < 4; CHAN++)
-
-#define IS_CHANNEL_ENABLED(INST, CHAN)\
- ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
-
-#define IS_CHANNEL_ENABLED2(INST, CHAN)\
- ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
-
-#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
- FOR_EACH_CHANNEL( CHAN )\
- if (IS_CHANNEL_ENABLED( INST, CHAN ))
-
-#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
- FOR_EACH_CHANNEL( CHAN )\
- if (IS_CHANNEL_ENABLED2( INST, CHAN ))
-
-
-/** The execution mask depends on the conditional mask and the loop mask */
-#define UPDATE_EXEC_MASK(MACH) \
- MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
-
-
-#define CHAN_X 0
-#define CHAN_Y 1
-#define CHAN_Z 2
-#define CHAN_W 3
-
-
-
-/**
- * Initialize machine state by expanding tokens to full instructions,
- * allocating temporary storage, setting up constants, etc.
- * After this, we can call tgsi_exec_machine_run() many times.
- */
-void
-tgsi_exec_machine_bind_shader(
- struct tgsi_exec_machine *mach,
- const struct tgsi_token *tokens,
- uint numSamplers,
- struct tgsi_sampler *samplers)
-{
- uint k;
- struct tgsi_parse_context parse;
- struct tgsi_exec_labels *labels = &mach->Labels;
- struct tgsi_full_instruction *instructions;
- struct tgsi_full_declaration *declarations;
- uint maxInstructions = 10, numInstructions = 0;
- uint maxDeclarations = 10, numDeclarations = 0;
- uint instno = 0;
-
-#if 0
- tgsi_dump(tokens, 0);
-#endif
-
- mach->Tokens = tokens;
- mach->Samplers = samplers;
-
- k = tgsi_parse_init (&parse, mach->Tokens);
- if (k != TGSI_PARSE_OK) {
- debug_printf( "Problem parsing!\n" );
- return;
- }
-
- mach->Processor = parse.FullHeader.Processor.Processor;
- mach->ImmLimit = 0;
- labels->count = 0;
-
- declarations = (struct tgsi_full_declaration *)
- MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
-
- if (!declarations) {
- return;
- }
-
- instructions = (struct tgsi_full_instruction *)
- MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
-
- if (!instructions) {
- FREE( declarations );
- return;
- }
-
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
- uint pointer = parse.Position;
- uint i;
-
- tgsi_parse_token( &parse );
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- /* save expanded declaration */
- if (numDeclarations == maxDeclarations) {
- declarations = REALLOC(declarations,
- maxDeclarations
- * sizeof(struct tgsi_full_declaration),
- (maxDeclarations + 10)
- * sizeof(struct tgsi_full_declaration));
- maxDeclarations += 10;
- }
- memcpy(declarations + numDeclarations,
- &parse.FullToken.FullDeclaration,
- sizeof(declarations[0]));
- numDeclarations++;
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- {
- uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
- assert( size % 4 == 0 );
- assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
-
- for( i = 0; i < size; i++ ) {
- mach->Imms[mach->ImmLimit + i / 4][i % 4] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
- }
- mach->ImmLimit += size / 4;
- }
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- assert( labels->count < MAX_LABELS );
-
- labels->labels[labels->count][0] = instno;
- labels->labels[labels->count][1] = pointer;
- labels->count++;
-
- /* save expanded instruction */
- if (numInstructions == maxInstructions) {
- instructions = REALLOC(instructions,
- maxInstructions
- * sizeof(struct tgsi_full_instruction),
- (maxInstructions + 10)
- * sizeof(struct tgsi_full_instruction));
- maxInstructions += 10;
- }
- memcpy(instructions + numInstructions,
- &parse.FullToken.FullInstruction,
- sizeof(instructions[0]));
- numInstructions++;
- break;
-
- default:
- assert( 0 );
- }
- }
- tgsi_parse_free (&parse);
-
- if (mach->Declarations) {
- FREE( mach->Declarations );
- }
- mach->Declarations = declarations;
- mach->NumDeclarations = numDeclarations;
-
- if (mach->Instructions) {
- FREE( mach->Instructions );
- }
- mach->Instructions = instructions;
- mach->NumInstructions = numInstructions;
-}
-
-
-void
-tgsi_exec_machine_init(
- struct tgsi_exec_machine *mach )
-{
- uint i;
-
- mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
- mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
-
- /* Setup constants. */
- for( i = 0; i < 4; i++ ) {
- mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
- mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
- mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
- mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
- mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
- mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
- mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
- mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
- mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
- mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
- }
-}
-
-
-void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
-{
- if (mach->Instructions) {
- FREE(mach->Instructions);
- mach->Instructions = NULL;
- mach->NumInstructions = 0;
- }
- if (mach->Declarations) {
- FREE(mach->Declarations);
- mach->Declarations = NULL;
- mach->NumDeclarations = 0;
- }
-}
-
-
-static void
-micro_abs(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = fabsf( src->f[0] );
- dst->f[1] = fabsf( src->f[1] );
- dst->f[2] = fabsf( src->f[2] );
- dst->f[3] = fabsf( src->f[3] );
-}
-
-static void
-micro_add(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] + src1->f[0];
- dst->f[1] = src0->f[1] + src1->f[1];
- dst->f[2] = src0->f[2] + src1->f[2];
- dst->f[3] = src0->f[3] + src1->f[3];
-}
-
-static void
-micro_iadd(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] + src1->i[0];
- dst->i[1] = src0->i[1] + src1->i[1];
- dst->i[2] = src0->i[2] + src1->i[2];
- dst->i[3] = src0->i[3] + src1->i[3];
-}
-
-static void
-micro_and(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] & src1->u[0];
- dst->u[1] = src0->u[1] & src1->u[1];
- dst->u[2] = src0->u[2] & src1->u[2];
- dst->u[3] = src0->u[3] & src1->u[3];
-}
-
-static void
-micro_ceil(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = ceilf( src->f[0] );
- dst->f[1] = ceilf( src->f[1] );
- dst->f[2] = ceilf( src->f[2] );
- dst->f[3] = ceilf( src->f[3] );
-}
-
-static void
-micro_cos(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = cosf( src->f[0] );
- dst->f[1] = cosf( src->f[1] );
- dst->f[2] = cosf( src->f[2] );
- dst->f[3] = cosf( src->f[3] );
-}
-
-static void
-micro_ddx(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] =
- dst->f[1] =
- dst->f[2] =
- dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
-}
-
-static void
-micro_ddy(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] =
- dst->f[1] =
- dst->f[2] =
- dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
-}
-
-static void
-micro_div(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] / src1->f[0];
- dst->f[1] = src0->f[1] / src1->f[1];
- dst->f[2] = src0->f[2] / src1->f[2];
- dst->f[3] = src0->f[3] / src1->f[3];
-}
-
-static void
-micro_udiv(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] / src1->u[0];
- dst->u[1] = src0->u[1] / src1->u[1];
- dst->u[2] = src0->u[2] / src1->u[2];
- dst->u[3] = src0->u[3] / src1->u[3];
-}
-
-static void
-micro_eq(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
-micro_ieq(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
- dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
- dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
- dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
-}
-
-static void
-micro_exp2(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
-{
- dst->f[0] = powf( 2.0f, src->f[0] );
- dst->f[1] = powf( 2.0f, src->f[1] );
- dst->f[2] = powf( 2.0f, src->f[2] );
- dst->f[3] = powf( 2.0f, src->f[3] );
-}
-
-static void
-micro_f2it(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->i[0] = (int) src->f[0];
- dst->i[1] = (int) src->f[1];
- dst->i[2] = (int) src->f[2];
- dst->i[3] = (int) src->f[3];
-}
-
-static void
-micro_f2ut(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->u[0] = (uint) src->f[0];
- dst->u[1] = (uint) src->f[1];
- dst->u[2] = (uint) src->f[2];
- dst->u[3] = (uint) src->f[3];
-}
-
-static void
-micro_flr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = floorf( src->f[0] );
- dst->f[1] = floorf( src->f[1] );
- dst->f[2] = floorf( src->f[2] );
- dst->f[3] = floorf( src->f[3] );
-}
-
-static void
-micro_frc(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = src->f[0] - floorf( src->f[0] );
- dst->f[1] = src->f[1] - floorf( src->f[1] );
- dst->f[2] = src->f[2] - floorf( src->f[2] );
- dst->f[3] = src->f[3] - floorf( src->f[3] );
-}
-
-static void
-micro_ge(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
-micro_i2f(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) src->i[0];
- dst->f[1] = (float) src->i[1];
- dst->f[2] = (float) src->i[2];
- dst->f[3] = (float) src->i[3];
-}
-
-static void
-micro_lg2(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = logf( src->f[0] ) * 1.442695f;
- dst->f[1] = logf( src->f[1] ) * 1.442695f;
- dst->f[2] = logf( src->f[2] ) * 1.442695f;
- dst->f[3] = logf( src->f[3] ) * 1.442695f;
-}
-
-static void
-micro_le(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
-micro_lt(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
-micro_ilt(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
- dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
- dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
- dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
-}
-
-static void
-micro_ult(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
- dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
- dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
- dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
-}
-
-static void
-micro_max(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
- dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
- dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
- dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
-}
-
-static void
-micro_imax(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
- dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
- dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
- dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
-}
-
-static void
-micro_umax(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
- dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
- dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
- dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
-}
-
-static void
-micro_min(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
- dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
- dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
- dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
-}
-
-static void
-micro_imin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
- dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
- dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
- dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
-}
-
-static void
-micro_umin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
- dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
- dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
- dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
-}
-
-static void
-micro_umod(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] % src1->u[0];
- dst->u[1] = src0->u[1] % src1->u[1];
- dst->u[2] = src0->u[2] % src1->u[2];
- dst->u[3] = src0->u[3] % src1->u[3];
-}
-
-static void
-micro_mul(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] * src1->f[0];
- dst->f[1] = src0->f[1] * src1->f[1];
- dst->f[2] = src0->f[2] * src1->f[2];
- dst->f[3] = src0->f[3] * src1->f[3];
-}
-
-static void
-micro_imul(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] * src1->i[0];
- dst->i[1] = src0->i[1] * src1->i[1];
- dst->i[2] = src0->i[2] * src1->i[2];
- dst->i[3] = src0->i[3] * src1->i[3];
-}
-
-static void
-micro_imul64(
- union tgsi_exec_channel *dst0,
- union tgsi_exec_channel *dst1,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst1->i[0] = src0->i[0] * src1->i[0];
- dst1->i[1] = src0->i[1] * src1->i[1];
- dst1->i[2] = src0->i[2] * src1->i[2];
- dst1->i[3] = src0->i[3] * src1->i[3];
- dst0->i[0] = 0;
- dst0->i[1] = 0;
- dst0->i[2] = 0;
- dst0->i[3] = 0;
-}
-
-static void
-micro_umul64(
- union tgsi_exec_channel *dst0,
- union tgsi_exec_channel *dst1,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst1->u[0] = src0->u[0] * src1->u[0];
- dst1->u[1] = src0->u[1] * src1->u[1];
- dst1->u[2] = src0->u[2] * src1->u[2];
- dst1->u[3] = src0->u[3] * src1->u[3];
- dst0->u[0] = 0;
- dst0->u[1] = 0;
- dst0->u[2] = 0;
- dst0->u[3] = 0;
-}
-
-static void
-micro_movc(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2 )
-{
- dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
- dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
- dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
- dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
-}
-
-static void
-micro_neg(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = -src->f[0];
- dst->f[1] = -src->f[1];
- dst->f[2] = -src->f[2];
- dst->f[3] = -src->f[3];
-}
-
-static void
-micro_ineg(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->i[0] = -src->i[0];
- dst->i[1] = -src->i[1];
- dst->i[2] = -src->i[2];
- dst->i[3] = -src->i[3];
-}
-
-static void
-micro_not(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->u[0] = ~src->u[0];
- dst->u[1] = ~src->u[1];
- dst->u[2] = ~src->u[2];
- dst->u[3] = ~src->u[3];
-}
-
-static void
-micro_or(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] | src1->u[0];
- dst->u[1] = src0->u[1] | src1->u[1];
- dst->u[2] = src0->u[2] | src1->u[2];
- dst->u[3] = src0->u[3] | src1->u[3];
-}
-
-static void
-micro_pow(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = powf( src0->f[0], src1->f[0] );
- dst->f[1] = powf( src0->f[1], src1->f[1] );
- dst->f[2] = powf( src0->f[2], src1->f[2] );
- dst->f[3] = powf( src0->f[3], src1->f[3] );
-}
-
-static void
-micro_rnd(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = floorf( src->f[0] + 0.5f );
- dst->f[1] = floorf( src->f[1] + 0.5f );
- dst->f[2] = floorf( src->f[2] + 0.5f );
- dst->f[3] = floorf( src->f[3] + 0.5f );
-}
-
-static void
-micro_shl(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] << src1->i[0];
- dst->i[1] = src0->i[1] << src1->i[1];
- dst->i[2] = src0->i[2] << src1->i[2];
- dst->i[3] = src0->i[3] << src1->i[3];
-}
-
-static void
-micro_ishr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] >> src1->i[0];
- dst->i[1] = src0->i[1] >> src1->i[1];
- dst->i[2] = src0->i[2] >> src1->i[2];
- dst->i[3] = src0->i[3] >> src1->i[3];
-}
-
-static void
-micro_trunc(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0 )
-{
- dst->f[0] = (float) (int) src0->f[0];
- dst->f[1] = (float) (int) src0->f[1];
- dst->f[2] = (float) (int) src0->f[2];
- dst->f[3] = (float) (int) src0->f[3];
-}
-
-static void
-micro_ushr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] >> src1->u[0];
- dst->u[1] = src0->u[1] >> src1->u[1];
- dst->u[2] = src0->u[2] >> src1->u[2];
- dst->u[3] = src0->u[3] >> src1->u[3];
-}
-
-static void
-micro_sin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = sinf( src->f[0] );
- dst->f[1] = sinf( src->f[1] );
- dst->f[2] = sinf( src->f[2] );
- dst->f[3] = sinf( src->f[3] );
-}
-
-static void
-micro_sqrt( union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = sqrtf( src->f[0] );
- dst->f[1] = sqrtf( src->f[1] );
- dst->f[2] = sqrtf( src->f[2] );
- dst->f[3] = sqrtf( src->f[3] );
-}
-
-static void
-micro_sub(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] - src1->f[0];
- dst->f[1] = src0->f[1] - src1->f[1];
- dst->f[2] = src0->f[2] - src1->f[2];
- dst->f[3] = src0->f[3] - src1->f[3];
-}
-
-static void
-micro_u2f(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) src->u[0];
- dst->f[1] = (float) src->u[1];
- dst->f[2] = (float) src->u[2];
- dst->f[3] = (float) src->u[3];
-}
-
-static void
-micro_xor(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] ^ src1->u[0];
- dst->u[1] = src0->u[1] ^ src1->u[1];
- dst->u[2] = src0->u[2] ^ src1->u[2];
- dst->u[3] = src0->u[3] ^ src1->u[3];
-}
-
-static void
-fetch_src_file_channel(
- const struct tgsi_exec_machine *mach,
- const uint file,
- const uint swizzle,
- const union tgsi_exec_channel *index,
- union tgsi_exec_channel *chan )
-{
- switch( swizzle ) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
- switch( file ) {
- case TGSI_FILE_CONSTANT:
- chan->f[0] = mach->Consts[index->i[0]][swizzle];
- chan->f[1] = mach->Consts[index->i[1]][swizzle];
- chan->f[2] = mach->Consts[index->i[2]][swizzle];
- chan->f[3] = mach->Consts[index->i[3]][swizzle];
- break;
-
- case TGSI_FILE_INPUT:
- chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- case TGSI_FILE_TEMPORARY:
- assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
- chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- case TGSI_FILE_IMMEDIATE:
- assert( index->i[0] < (int) mach->ImmLimit );
- chan->f[0] = mach->Imms[index->i[0]][swizzle];
- assert( index->i[1] < (int) mach->ImmLimit );
- chan->f[1] = mach->Imms[index->i[1]][swizzle];
- assert( index->i[2] < (int) mach->ImmLimit );
- chan->f[2] = mach->Imms[index->i[2]][swizzle];
- assert( index->i[3] < (int) mach->ImmLimit );
- chan->f[3] = mach->Imms[index->i[3]][swizzle];
- break;
-
- case TGSI_FILE_ADDRESS:
- chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- case TGSI_FILE_OUTPUT:
- /* vertex/fragment output vars can be read too */
- chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- default:
- assert( 0 );
- }
- break;
-
- case TGSI_EXTSWIZZLE_ZERO:
- *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
- break;
-
- default:
- assert( 0 );
- }
-}
-
-static void
-fetch_source(
- const struct tgsi_exec_machine *mach,
- union tgsi_exec_channel *chan,
- const struct tgsi_full_src_register *reg,
- const uint chan_index )
-{
- union tgsi_exec_channel index;
- uint swizzle;
-
- index.i[0] =
- index.i[1] =
- index.i[2] =
- index.i[3] = reg->SrcRegister.Index;
-
- if (reg->SrcRegister.Indirect) {
- union tgsi_exec_channel index2;
- union tgsi_exec_channel indir_index;
-
- index2.i[0] =
- index2.i[1] =
- index2.i[2] =
- index2.i[3] = reg->SrcRegisterInd.Index;
-
- swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
- fetch_src_file_channel(
- mach,
- reg->SrcRegisterInd.File,
- swizzle,
- &index2,
- &indir_index );
-
- index.i[0] += indir_index.i[0];
- index.i[1] += indir_index.i[1];
- index.i[2] += indir_index.i[2];
- index.i[3] += indir_index.i[3];
- }
-
- if( reg->SrcRegister.Dimension ) {
- switch( reg->SrcRegister.File ) {
- case TGSI_FILE_INPUT:
- index.i[0] *= 17;
- index.i[1] *= 17;
- index.i[2] *= 17;
- index.i[3] *= 17;
- break;
- case TGSI_FILE_CONSTANT:
- index.i[0] *= 4096;
- index.i[1] *= 4096;
- index.i[2] *= 4096;
- index.i[3] *= 4096;
- break;
- default:
- assert( 0 );
- }
-
- index.i[0] += reg->SrcRegisterDim.Index;
- index.i[1] += reg->SrcRegisterDim.Index;
- index.i[2] += reg->SrcRegisterDim.Index;
- index.i[3] += reg->SrcRegisterDim.Index;
-
- if (reg->SrcRegisterDim.Indirect) {
- union tgsi_exec_channel index2;
- union tgsi_exec_channel indir_index;
-
- index2.i[0] =
- index2.i[1] =
- index2.i[2] =
- index2.i[3] = reg->SrcRegisterDimInd.Index;
-
- swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
- fetch_src_file_channel(
- mach,
- reg->SrcRegisterDimInd.File,
- swizzle,
- &index2,
- &indir_index );
-
- index.i[0] += indir_index.i[0];
- index.i[1] += indir_index.i[1];
- index.i[2] += indir_index.i[2];
- index.i[3] += indir_index.i[3];
- }
- }
-
- swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
- fetch_src_file_channel(
- mach,
- reg->SrcRegister.File,
- swizzle,
- &index,
- chan );
-
- switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
- case TGSI_UTIL_SIGN_CLEAR:
- micro_abs( chan, chan );
- break;
-
- case TGSI_UTIL_SIGN_SET:
- micro_abs( chan, chan );
- micro_neg( chan, chan );
- break;
-
- case TGSI_UTIL_SIGN_TOGGLE:
- micro_neg( chan, chan );
- break;
-
- case TGSI_UTIL_SIGN_KEEP:
- break;
- }
-
- if (reg->SrcRegisterExtMod.Complement) {
- micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
- }
-}
-
-static void
-store_dest(
- struct tgsi_exec_machine *mach,
- const union tgsi_exec_channel *chan,
- const struct tgsi_full_dst_register *reg,
- const struct tgsi_full_instruction *inst,
- uint chan_index )
-{
- union tgsi_exec_channel *dst;
-
- switch( reg->DstRegister.File ) {
- case TGSI_FILE_NULL:
- return;
-
- case TGSI_FILE_OUTPUT:
- dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
- + reg->DstRegister.Index].xyzw[chan_index];
- break;
-
- case TGSI_FILE_TEMPORARY:
- assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS);
- dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
- break;
-
- case TGSI_FILE_ADDRESS:
- dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
- break;
-
- default:
- assert( 0 );
- return;
- }
-
- switch (inst->Instruction.Saturate)
- {
- case TGSI_SAT_NONE:
- if (mach->ExecMask & 0x1)
- dst->i[0] = chan->i[0];
- if (mach->ExecMask & 0x2)
- dst->i[1] = chan->i[1];
- if (mach->ExecMask & 0x4)
- dst->i[2] = chan->i[2];
- if (mach->ExecMask & 0x8)
- dst->i[3] = chan->i[3];
- break;
-
- case TGSI_SAT_ZERO_ONE:
- /* XXX need to obey ExecMask here */
- micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
- break;
-
- case TGSI_SAT_MINUS_PLUS_ONE:
- assert( 0 );
- break;
-
- default:
- assert( 0 );
- }
-}
-
-#define FETCH(VAL,INDEX,CHAN)\
- fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
-
-#define STORE(VAL,INDEX,CHAN)\
- store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
-
-
-/**
- * Execute ARB-style KIL which is predicated by a src register.
- * Kill fragment if any of the four values is less than zero.
- */
-static void
-exec_kilp(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- uint uniquemask;
- uint chan_index;
- uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
- union tgsi_exec_channel r[1];
-
- /* This mask stores component bits that were already tested. Note that
- * we test if the value is less than zero, so 1.0 and 0.0 need not to be
- * tested. */
- uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
-
- for (chan_index = 0; chan_index < 4; chan_index++)
- {
- uint swizzle;
- uint i;
-
- /* unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_extswizzle (
- &inst->FullSrcRegisters[0],
- chan_index);
-
- /* check if the component has not been already tested */
- if (uniquemask & (1 << swizzle))
- continue;
- uniquemask |= 1 << swizzle;
-
- FETCH(&r[0], 0, chan_index);
- for (i = 0; i < 4; i++)
- if (r[0].f[i] < 0.0f)
- kilmask |= 1 << i;
- }
-
- mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
-}
-
-
-/*
- * Fetch a texel using STR texture coordinates.
- */
-static void
-fetch_texel( struct tgsi_sampler *sampler,
- const union tgsi_exec_channel *s,
- const union tgsi_exec_channel *t,
- const union tgsi_exec_channel *p,
- float lodbias, /* XXX should be float[4] */
- union tgsi_exec_channel *r,
- union tgsi_exec_channel *g,
- union tgsi_exec_channel *b,
- union tgsi_exec_channel *a )
-{
- uint j;
- float rgba[NUM_CHANNELS][QUAD_SIZE];
-
- sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
-
- for (j = 0; j < 4; j++) {
- r->f[j] = rgba[0][j];
- g->f[j] = rgba[1][j];
- b->f[j] = rgba[2][j];
- a->f[j] = rgba[3][j];
- }
-}
-
-
-static void
-exec_tex(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst,
- boolean biasLod,
- boolean projected)
-{
- const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
- union tgsi_exec_channel r[8];
- uint chan_index;
- float lodBias;
-
- /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
-
- switch (inst->InstructionExtTexture.Texture) {
- case TGSI_TEXTURE_1D:
-
- FETCH(&r[0], 0, CHAN_X);
-
- if (projected) {
- FETCH(&r[1], 0, CHAN_W);
- micro_div( &r[0], &r[0], &r[1] );
- }
-
- if (biasLod) {
- FETCH(&r[1], 0, CHAN_W);
- lodBias = r[2].f[0];
- }
- else
- lodBias = 0.0;
-
- fetch_texel(&mach->Samplers[unit],
- &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
- &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
- break;
-
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_RECT:
-
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 0, CHAN_Z);
-
- if (projected) {
- FETCH(&r[3], 0, CHAN_W);
- micro_div( &r[0], &r[0], &r[3] );
- micro_div( &r[1], &r[1], &r[3] );
- micro_div( &r[2], &r[2], &r[3] );
- }
-
- if (biasLod) {
- FETCH(&r[3], 0, CHAN_W);
- lodBias = r[3].f[0];
- }
- else
- lodBias = 0.0;
-
- fetch_texel(&mach->Samplers[unit],
- &r[0], &r[1], &r[2], lodBias, /* inputs */
- &r[0], &r[1], &r[2], &r[3]); /* outputs */
- break;
-
- case TGSI_TEXTURE_3D:
- case TGSI_TEXTURE_CUBE:
-
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 0, CHAN_Z);
-
- if (projected) {
- FETCH(&r[3], 0, CHAN_W);
- micro_div( &r[0], &r[0], &r[3] );
- micro_div( &r[1], &r[1], &r[3] );
- micro_div( &r[2], &r[2], &r[3] );
- }
-
- if (biasLod) {
- FETCH(&r[3], 0, CHAN_W);
- lodBias = r[3].f[0];
- }
- else
- lodBias = 0.0;
-
- fetch_texel(&mach->Samplers[unit],
- &r[0], &r[1], &r[2], lodBias,
- &r[0], &r[1], &r[2], &r[3]);
- break;
-
- default:
- assert (0);
- }
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[chan_index], 0, chan_index );
- }
-}
-
-
-/**
- * Evaluate a constant-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_constant_coef(
- struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
-{
- unsigned i;
-
- for( i = 0; i < QUAD_SIZE; i++ ) {
- mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
- }
-}
-
-/**
- * Evaluate a linear-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_linear_coef(
- struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
-{
- const float x = mach->QuadPos.xyzw[0].f[0];
- const float y = mach->QuadPos.xyzw[1].f[0];
- const float dadx = mach->InterpCoefs[attrib].dadx[chan];
- const float dady = mach->InterpCoefs[attrib].dady[chan];
- const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
- mach->Inputs[attrib].xyzw[chan].f[0] = a0;
- mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
- mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
- mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
-}
-
-/**
- * Evaluate a perspective-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_perspective_coef(
- struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
-{
- const float x = mach->QuadPos.xyzw[0].f[0];
- const float y = mach->QuadPos.xyzw[1].f[0];
- const float dadx = mach->InterpCoefs[attrib].dadx[chan];
- const float dady = mach->InterpCoefs[attrib].dady[chan];
- const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
- const float *w = mach->QuadPos.xyzw[3].f;
- /* divide by W here */
- mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
- mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
- mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
- mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
-}
-
-
-typedef void (* eval_coef_func)(
- struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan );
-
-static void
-exec_declaration(
- struct tgsi_exec_machine *mach,
- const struct tgsi_full_declaration *decl )
-{
- if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- eval_coef_func eval;
-
- first = decl->DeclarationRange.First;
- last = decl->DeclarationRange.Last;
- mask = decl->Declaration.UsageMask;
-
- switch( decl->Declaration.Interpolate ) {
- case TGSI_INTERPOLATE_CONSTANT:
- eval = eval_constant_coef;
- break;
-
- case TGSI_INTERPOLATE_LINEAR:
- eval = eval_linear_coef;
- break;
-
- case TGSI_INTERPOLATE_PERSPECTIVE:
- eval = eval_perspective_coef;
- break;
-
- default:
- assert( 0 );
- }
-
- if( mask == TGSI_WRITEMASK_XYZW ) {
- unsigned i, j;
-
- for( i = first; i <= last; i++ ) {
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- eval( mach, i, j );
- }
- }
- }
- else {
- unsigned i, j;
-
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- for( i = first; i <= last; i++ ) {
- eval( mach, i, j );
- }
- }
- }
- }
- }
- }
-}
-
-static void
-exec_instruction(
- struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst,
- int *pc )
-{
- uint chan_index;
- union tgsi_exec_channel r[8];
-
- (*pc)++;
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ARL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_f2it( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LIT:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_X );
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Y );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[1], 0, CHAN_Y );
- micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-
- FETCH( &r[2], 0, CHAN_W );
- micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
- micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
- micro_pow( &r[1], &r[1], &r[2] );
- micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Z );
- }
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
- FETCH( &r[0], 0, CHAN_X );
- micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
- FETCH( &r[0], 0, CHAN_X );
- micro_sqrt( &r[0], &r[0] );
- micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_EXP:
- FETCH( &r[0], 0, CHAN_X );
- micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
- STORE( &r[2], 0, CHAN_X ); /* store r2 */
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
- STORE( &r[2], 0, CHAN_Y ); /* store r2 */
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
- STORE( &r[2], 0, CHAN_Z ); /* store r2 */
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_LOG:
- FETCH( &r[0], 0, CHAN_X );
- micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
- micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
- micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &r[0], 0, CHAN_X );
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
- micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
- STORE( &r[0], 0, CHAN_Y );
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- STORE( &r[1], 0, CHAN_Z );
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_MUL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
- {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- micro_mul( &r[0], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_ADD:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_add( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DP3:
- /* TGSI_OPCODE_DOT3 */
- FETCH( &r[0], 0, CHAN_X );
- FETCH( &r[1], 1, CHAN_X );
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH( &r[1], 0, CHAN_Y );
- FETCH( &r[2], 1, CHAN_Y );
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH( &r[1], 0, CHAN_Z );
- FETCH( &r[2], 1, CHAN_Z );
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DP4:
- /* TGSI_OPCODE_DOT4 */
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 1, CHAN_Y);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Z);
- FETCH(&r[2], 1, CHAN_Z);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_W);
- FETCH(&r[2], 1, CHAN_W);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DST:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- FETCH( &r[0], 0, CHAN_Y );
- FETCH( &r[1], 1, CHAN_Y);
- micro_mul( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, CHAN_Y );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_Z );
- STORE( &r[0], 0, CHAN_Z );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- FETCH( &r[0], 1, CHAN_W );
- STORE( &r[0], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_MIN:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- /* XXX use micro_min()?? */
- micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_MAX:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- /* XXX use micro_max()?? */
- micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
-
- STORE(&r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SLT:
- /* TGSI_OPCODE_SETLT */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SGE:
- /* TGSI_OPCODE_SETGE */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_mul( &r[0], &r[0], &r[1] );
- FETCH( &r[1], 2, chan_index );
- micro_add( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SUB:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- micro_sub( &r[0], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
-
- micro_sub( &r[1], &r[1], &r[2] );
- micro_mul( &r[0], &r[0], &r[1] );
- micro_add( &r[0], &r[0], &r[2] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_CND:
- assert (0);
- break;
-
- case TGSI_OPCODE_CND0:
- assert (0);
- break;
-
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
- assert (0);
- break;
-
- case TGSI_OPCODE_INDEX:
- assert (0);
- break;
-
- case TGSI_OPCODE_NEGATE:
- assert (0);
- break;
-
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_frc( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_CLAMP:
- assert (0);
- break;
-
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_flr( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_ROUND:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_rnd( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
- FETCH(&r[0], 0, CHAN_X);
-
- micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
- FETCH( &r[0], 0, CHAN_X );
- micro_lg2( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- micro_pow( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
- FETCH(&r[0], 0, CHAN_Y);
- FETCH(&r[1], 1, CHAN_Z);
-
- micro_mul( &r[2], &r[0], &r[1] );
-
- FETCH(&r[3], 0, CHAN_Z);
- FETCH(&r[4], 1, CHAN_Y);
-
- micro_mul( &r[5], &r[3], &r[4] );
- micro_sub( &r[2], &r[2], &r[5] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &r[2], 0, CHAN_X );
- }
-
- FETCH(&r[2], 1, CHAN_X);
-
- micro_mul( &r[3], &r[3], &r[2] );
-
- FETCH(&r[5], 0, CHAN_X);
-
- micro_mul( &r[1], &r[1], &r[5] );
- micro_sub( &r[3], &r[3], &r[1] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- STORE( &r[3], 0, CHAN_Y );
- }
-
- micro_mul( &r[5], &r[5], &r[4] );
- micro_mul( &r[0], &r[0], &r[2] );
- micro_sub( &r[5], &r[5], &r[0] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- STORE( &r[5], 0, CHAN_Z );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_MULTIPLYMATRIX:
- assert (0);
- break;
-
- case TGSI_OPCODE_ABS:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
-
- micro_abs( &r[0], &r[0] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_RCC:
- assert (0);
- break;
-
- case TGSI_OPCODE_DPH:
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 1, CHAN_Y);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Z);
- FETCH(&r[2], 1, CHAN_Z);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 1, CHAN_W);
-
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_COS:
- FETCH(&r[0], 0, CHAN_X);
-
- micro_cos( &r[0], &r[0] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DDX:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ddx( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DDY:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ddy( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_KILP:
- exec_kilp (mach, inst);
- break;
-
- case TGSI_OPCODE_KIL:
- /* for enabled ExecMask bits, set the killed bit */
- mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
- break;
-
- case TGSI_OPCODE_PK2H:
- assert (0);
- break;
-
- case TGSI_OPCODE_PK2US:
- assert (0);
- break;
-
- case TGSI_OPCODE_PK4B:
- assert (0);
- break;
-
- case TGSI_OPCODE_PK4UB:
- assert (0);
- break;
-
- case TGSI_OPCODE_RFL:
- assert (0);
- break;
-
- case TGSI_OPCODE_SEQ:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_eq( &r[0], &r[0], &r[1],
- &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
- &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SFL:
- assert (0);
- break;
-
- case TGSI_OPCODE_SGT:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SIN:
- FETCH( &r[0], 0, CHAN_X );
- micro_sin( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SLE:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SNE:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_STR:
- assert (0);
- break;
-
- case TGSI_OPCODE_TEX:
- /* simple texture lookup */
- /* src[0] = texcoord */
- /* src[1] = sampler unit */
- exec_tex(mach, inst, FALSE, FALSE);
- break;
-
- case TGSI_OPCODE_TXB:
- /* Texture lookup with lod bias */
- /* src[0] = texcoord (src[0].w = LOD bias) */
- /* src[1] = sampler unit */
- exec_tex(mach, inst, TRUE, FALSE);
- break;
-
- case TGSI_OPCODE_TXD:
- /* Texture lookup with explict partial derivatives */
- /* src[0] = texcoord */
- /* src[1] = d[strq]/dx */
- /* src[2] = d[strq]/dy */
- /* src[3] = sampler unit */
- assert (0);
- break;
-
- case TGSI_OPCODE_TXL:
- /* Texture lookup with explit LOD */
- /* src[0] = texcoord (src[0].w = LOD) */
- /* src[1] = sampler unit */
- exec_tex(mach, inst, TRUE, FALSE);
- break;
-
- case TGSI_OPCODE_TXP:
- /* Texture lookup with projection */
- /* src[0] = texcoord (src[0].w = projection) */
- /* src[1] = sampler unit */
- exec_tex(mach, inst, FALSE, TRUE);
- break;
-
- case TGSI_OPCODE_UP2H:
- assert (0);
- break;
-
- case TGSI_OPCODE_UP2US:
- assert (0);
- break;
-
- case TGSI_OPCODE_UP4B:
- assert (0);
- break;
-
- case TGSI_OPCODE_UP4UB:
- assert (0);
- break;
-
- case TGSI_OPCODE_X2D:
- assert (0);
- break;
-
- case TGSI_OPCODE_ARA:
- assert (0);
- break;
-
- case TGSI_OPCODE_ARR:
- assert (0);
- break;
-
- case TGSI_OPCODE_BRA:
- assert (0);
- break;
-
- case TGSI_OPCODE_CAL:
- /* skip the call if no execution channels are enabled */
- if (mach->ExecMask) {
- /* do the call */
-
- /* push the Cond, Loop, Cont stacks */
- assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
- mach->CondStack[mach->CondStackTop++] = mach->CondMask;
- assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
- assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->ContStack[mach->ContStackTop++] = mach->ContMask;
-
- assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
- mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
-
- /* note that PC was already incremented above */
- mach->CallStack[mach->CallStackTop++] = *pc;
- *pc = inst->InstructionExtLabel.Label;
- }
- break;
-
- case TGSI_OPCODE_RET:
- mach->FuncMask &= ~mach->ExecMask;
- UPDATE_EXEC_MASK(mach);
-
- if (mach->ExecMask == 0x0) {
- /* really return now (otherwise, keep executing */
-
- if (mach->CallStackTop == 0) {
- /* returning from main() */
- *pc = -1;
- return;
- }
- *pc = mach->CallStack[--mach->CallStackTop];
-
- /* pop the Cond, Loop, Cont stacks */
- assert(mach->CondStackTop > 0);
- mach->CondMask = mach->CondStack[--mach->CondStackTop];
- assert(mach->LoopStackTop > 0);
- mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[--mach->ContStackTop];
- assert(mach->FuncStackTop > 0);
- mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
-
- UPDATE_EXEC_MASK(mach);
- }
- break;
-
- case TGSI_OPCODE_SSG:
- assert (0);
- break;
-
- case TGSI_OPCODE_CMP:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
-
- micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_SCS:
- if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- FETCH( &r[0], 0, CHAN_X );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
- micro_cos( &r[1], &r[0] );
- STORE( &r[1], 0, CHAN_X );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- micro_sin( &r[1], &r[0] );
- STORE( &r[1], 0, CHAN_Y );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_NRM:
- assert (0);
- break;
-
- case TGSI_OPCODE_DIV:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_DP2:
- FETCH( &r[0], 0, CHAN_X );
- FETCH( &r[1], 1, CHAN_X );
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH( &r[1], 0, CHAN_Y );
- FETCH( &r[2], 1, CHAN_Y );
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_IF:
- /* push CondMask */
- assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
- mach->CondStack[mach->CondStackTop++] = mach->CondMask;
- FETCH( &r[0], 0, CHAN_X );
- /* update CondMask */
- if( ! r[0].u[0] ) {
- mach->CondMask &= ~0x1;
- }
- if( ! r[0].u[1] ) {
- mach->CondMask &= ~0x2;
- }
- if( ! r[0].u[2] ) {
- mach->CondMask &= ~0x4;
- }
- if( ! r[0].u[3] ) {
- mach->CondMask &= ~0x8;
- }
- UPDATE_EXEC_MASK(mach);
- /* Todo: If CondMask==0, jump to ELSE */
- break;
-
- case TGSI_OPCODE_ELSE:
- /* invert CondMask wrt previous mask */
- {
- uint prevMask;
- assert(mach->CondStackTop > 0);
- prevMask = mach->CondStack[mach->CondStackTop - 1];
- mach->CondMask = ~mach->CondMask & prevMask;
- UPDATE_EXEC_MASK(mach);
- /* Todo: If CondMask==0, jump to ENDIF */
- }
- break;
-
- case TGSI_OPCODE_ENDIF:
- /* pop CondMask */
- assert(mach->CondStackTop > 0);
- mach->CondMask = mach->CondStack[--mach->CondStackTop];
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_END:
- /* halt execution */
- *pc = -1;
- break;
-
- case TGSI_OPCODE_REP:
- assert (0);
- break;
-
- case TGSI_OPCODE_ENDREP:
- assert (0);
- break;
-
- case TGSI_OPCODE_PUSHA:
- assert (0);
- break;
-
- case TGSI_OPCODE_POPA:
- assert (0);
- break;
-
- case TGSI_OPCODE_CEIL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ceil( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_I2F:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_i2f( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_NOT:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_not( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_TRUNC:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_trunc( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SHL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_shl( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SHR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_ishr( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_AND:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_and( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_OR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_or( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MOD:
- assert (0);
- break;
-
- case TGSI_OPCODE_XOR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_xor( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SAD:
- assert (0);
- break;
-
- case TGSI_OPCODE_TXF:
- assert (0);
- break;
-
- case TGSI_OPCODE_TXQ:
- assert (0);
- break;
-
- case TGSI_OPCODE_EMIT:
- mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
- mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
- break;
-
- case TGSI_OPCODE_ENDPRIM:
- mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
- mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
- break;
-
- case TGSI_OPCODE_LOOP:
- /* fall-through (for now) */
- case TGSI_OPCODE_BGNLOOP2:
- /* push LoopMask and ContMasks */
- assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
- assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->ContStack[mach->ContStackTop++] = mach->ContMask;
- break;
-
- case TGSI_OPCODE_ENDLOOP:
- /* fall-through (for now at least) */
- case TGSI_OPCODE_ENDLOOP2:
- /* Restore ContMask, but don't pop */
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
- UPDATE_EXEC_MASK(mach);
- if (mach->ExecMask) {
- /* repeat loop: jump to instruction just past BGNLOOP */
- *pc = inst->InstructionExtLabel.Label + 1;
- }
- else {
- /* exit loop: pop LoopMask */
- assert(mach->LoopStackTop > 0);
- mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- /* pop ContMask */
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[--mach->ContStackTop];
- }
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_BRK:
- /* turn off loop channels for each enabled exec channel */
- mach->LoopMask &= ~mach->ExecMask;
- /* Todo: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_CONT:
- /* turn off cont channels for each enabled exec channel */
- mach->ContMask &= ~mach->ExecMask;
- /* Todo: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_BGNSUB:
- /* no-op */
- break;
-
- case TGSI_OPCODE_ENDSUB:
- /* no-op */
- break;
-
- case TGSI_OPCODE_NOISE1:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_NOISE2:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_NOISE3:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_NOISE4:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_NOP:
- break;
-
- default:
- assert( 0 );
- }
-}
-
-
-/**
- * Run TGSI interpreter.
- * \return bitmask of "alive" quad components
- */
-uint
-tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
-{
- uint i;
- int pc = 0;
-
- mach->CondMask = 0xf;
- mach->LoopMask = 0xf;
- mach->ContMask = 0xf;
- mach->FuncMask = 0xf;
- mach->ExecMask = 0xf;
-
- mach->CondStackTop = 0; /* temporarily subvert this assertion */
- assert(mach->CondStackTop == 0);
- assert(mach->LoopStackTop == 0);
- assert(mach->ContStackTop == 0);
- assert(mach->CallStackTop == 0);
-
- mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
- mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
-
- if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
- mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
- mach->Primitives[0] = 0;
- }
-
-
- /* execute declarations (interpolants) */
- for (i = 0; i < mach->NumDeclarations; i++) {
- exec_declaration( mach, mach->Declarations+i );
- }
-
- /* execute instructions, until pc is set to -1 */
- while (pc != -1) {
- assert(pc < (int) mach->NumInstructions);
- exec_instruction( mach, mach->Instructions + pc, &pc );
- }
-
-#if 0
- /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
- if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
- /*
- * Scale back depth component.
- */
- for (i = 0; i < 4; i++)
- mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
- }
-#endif
-
- return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
-}
-
-
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
deleted file mode 100644
index 4f30650b07..0000000000
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
+++ /dev/null
@@ -1,253 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#if !defined TGSI_EXEC_H
-#define TGSI_EXEC_H
-
-#include "pipe/p_compiler.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-#define MAX_LABELS 1024
-
-#define NUM_CHANNELS 4 /* R,G,B,A */
-#define QUAD_SIZE 4 /* 4 pixel/quad */
-
-/**
- * Registers may be treated as float, signed int or unsigned int.
- */
-union tgsi_exec_channel
-{
- float f[QUAD_SIZE];
- int i[QUAD_SIZE];
- unsigned u[QUAD_SIZE];
-};
-
-/**
- * A vector[RGBA] of channels[4 pixels]
- */
-struct tgsi_exec_vector
-{
- union tgsi_exec_channel xyzw[NUM_CHANNELS];
-};
-
-/**
- * For fragment programs, information for computing fragment input
- * values from plane equation of the triangle/line.
- */
-struct tgsi_interp_coef
-{
- float a0[NUM_CHANNELS]; /* in an xyzw layout */
- float dadx[NUM_CHANNELS];
- float dady[NUM_CHANNELS];
-};
-
-
-struct softpipe_tile_cache; /**< Opaque to TGSI */
-
-/**
- * Information for sampling textures, which must be implemented
- * by code outside the TGSI executor.
- */
-struct tgsi_sampler
-{
- const struct pipe_sampler_state *state;
- struct pipe_texture *texture;
- /** Get samples for four fragments in a quad */
- void (*get_samples)(struct tgsi_sampler *sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE]);
- void *pipe; /*XXX temporary*/
- struct softpipe_tile_cache *cache;
-};
-
-/**
- * For branching/calling subroutines.
- */
-struct tgsi_exec_labels
-{
- unsigned labels[MAX_LABELS][2];
- unsigned count;
-};
-
-
-#define TGSI_EXEC_NUM_TEMPS 128
-#define TGSI_EXEC_NUM_TEMP_EXTRAS 6
-#define TGSI_EXEC_NUM_IMMEDIATES 256
-
-/*
- * Locations of various utility registers (_I = Index, _C = Channel)
- */
-#define TGSI_EXEC_TEMP_00000000_I (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_00000000_C 0
-
-#define TGSI_EXEC_TEMP_7FFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_7FFFFFFF_C 1
-
-#define TGSI_EXEC_TEMP_80000000_I (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_80000000_C 2
-
-#define TGSI_EXEC_TEMP_FFFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_FFFFFFFF_C 3
-
-#define TGSI_EXEC_TEMP_ONE_I (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_ONE_C 0
-
-#define TGSI_EXEC_TEMP_TWO_I (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_TWO_C 1
-
-#define TGSI_EXEC_TEMP_128_I (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_128_C 2
-
-#define TGSI_EXEC_TEMP_MINUS_128_I (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_MINUS_128_C 3
-
-#define TGSI_EXEC_TEMP_KILMASK_I (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_KILMASK_C 0
-
-#define TGSI_EXEC_TEMP_OUTPUT_I (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_OUTPUT_C 1
-
-#define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_PRIMITIVE_C 2
-
-#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_THREE_C 3
-
-#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_TEMP_HALF_C 0
-
-#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
-
-#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5)
-
-
-#define TGSI_EXEC_MAX_COND_NESTING 20
-#define TGSI_EXEC_MAX_LOOP_NESTING 20
-#define TGSI_EXEC_MAX_CALL_NESTING 20
-
-/**
- * Run-time virtual machine state for executing TGSI shader.
- */
-struct tgsi_exec_machine
-{
- /* Total = program temporaries + internal temporaries
- * + 1 padding to align to 16 bytes
- */
- struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS +
- TGSI_EXEC_NUM_TEMP_EXTRAS + 1];
-
- /*
- * This will point to _Temps after aligning to 16B boundary.
- */
- struct tgsi_exec_vector *Temps;
- struct tgsi_exec_vector *Addrs;
-
- struct tgsi_sampler *Samplers;
-
- float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
- unsigned ImmLimit;
- const float (*Consts)[4];
- struct tgsi_exec_vector *Inputs;
- struct tgsi_exec_vector *Outputs;
- const struct tgsi_token *Tokens;
- unsigned Processor;
-
- /* GEOMETRY processor only. */
- unsigned *Primitives;
-
- /* FRAGMENT processor only. */
- const struct tgsi_interp_coef *InterpCoefs;
- struct tgsi_exec_vector QuadPos;
-
- /* Conditional execution masks */
- uint CondMask; /**< For IF/ELSE/ENDIF */
- uint LoopMask; /**< For BGNLOOP/ENDLOOP */
- uint ContMask; /**< For loop CONT statements */
- uint FuncMask; /**< For function calls */
- uint ExecMask; /**< = CondMask & LoopMask */
-
- /** Condition mask stack (for nested conditionals) */
- uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
- int CondStackTop;
-
- /** Loop mask stack (for nested loops) */
- uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
- int LoopStackTop;
-
- /** Loop continue mask stack (see comments in tgsi_exec.c) */
- uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
- int ContStackTop;
-
- /** Function execution mask stack (for executing subroutine code) */
- uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
- int FuncStackTop;
-
- /** Function call stack for saving/restoring the program counter */
- uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
- int CallStackTop;
-
- struct tgsi_full_instruction *Instructions;
- uint NumInstructions;
-
- struct tgsi_full_declaration *Declarations;
- uint NumDeclarations;
-
- struct tgsi_exec_labels Labels;
-};
-
-void
-tgsi_exec_machine_init(
- struct tgsi_exec_machine *mach );
-
-
-void
-tgsi_exec_machine_bind_shader(
- struct tgsi_exec_machine *mach,
- const struct tgsi_token *tokens,
- uint numSamplers,
- struct tgsi_sampler *samplers);
-
-uint
-tgsi_exec_machine_run(
- struct tgsi_exec_machine *mach );
-
-
-void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
-
-
-#if defined __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* TGSI_EXEC_H */
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
deleted file mode 100755
index cdbdf5c882..0000000000
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ /dev/null
@@ -1,2275 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
-#include "tgsi_exec.h"
-#include "tgsi_sse2.h"
-
-#include "rtasm/rtasm_x86sse.h"
-
-#ifdef PIPE_ARCH_X86
-
-/* for 1/sqrt()
- *
- * This costs about 100fps (close to 10%) in gears:
- */
-#define HIGH_PRECISION 1
-
-
-#define FOR_EACH_CHANNEL( CHAN )\
- for( CHAN = 0; CHAN < 4; CHAN++ )
-
-#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
- ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
-
-#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
- if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
-
-#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
- FOR_EACH_CHANNEL( CHAN )\
- IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
-
-#define CHAN_X 0
-#define CHAN_Y 1
-#define CHAN_Z 2
-#define CHAN_W 3
-
-#define TEMP_R0 TGSI_EXEC_TEMP_R0
-
-/**
- * X86 utility functions.
- */
-
-static struct x86_reg
-make_xmm(
- unsigned xmm )
-{
- return x86_make_reg(
- file_XMM,
- (enum x86_reg_name) xmm );
-}
-
-/**
- * X86 register mapping helpers.
- */
-
-static struct x86_reg
-get_const_base( void )
-{
- return x86_make_reg(
- file_REG32,
- reg_CX );
-}
-
-static struct x86_reg
-get_input_base( void )
-{
- return x86_make_reg(
- file_REG32,
- reg_AX );
-}
-
-static struct x86_reg
-get_output_base( void )
-{
- return x86_make_reg(
- file_REG32,
- reg_DX );
-}
-
-static struct x86_reg
-get_temp_base( void )
-{
- return x86_make_reg(
- file_REG32,
- reg_BX );
-}
-
-static struct x86_reg
-get_coef_base( void )
-{
- return get_output_base();
-}
-
-static struct x86_reg
-get_immediate_base( void )
-{
- return x86_make_reg(
- file_REG32,
- reg_DI );
-}
-
-
-/**
- * Data access helpers.
- */
-
-
-static struct x86_reg
-get_immediate(
- unsigned vec,
- unsigned chan )
-{
- return x86_make_disp(
- get_immediate_base(),
- (vec * 4 + chan) * 4 );
-}
-
-static struct x86_reg
-get_const(
- unsigned vec,
- unsigned chan )
-{
- return x86_make_disp(
- get_const_base(),
- (vec * 4 + chan) * 4 );
-}
-
-static struct x86_reg
-get_input(
- unsigned vec,
- unsigned chan )
-{
- return x86_make_disp(
- get_input_base(),
- (vec * 4 + chan) * 16 );
-}
-
-static struct x86_reg
-get_output(
- unsigned vec,
- unsigned chan )
-{
- return x86_make_disp(
- get_output_base(),
- (vec * 4 + chan) * 16 );
-}
-
-static struct x86_reg
-get_temp(
- unsigned vec,
- unsigned chan )
-{
- return x86_make_disp(
- get_temp_base(),
- (vec * 4 + chan) * 16 );
-}
-
-static struct x86_reg
-get_coef(
- unsigned vec,
- unsigned chan,
- unsigned member )
-{
- return x86_make_disp(
- get_coef_base(),
- ((vec * 3 + member) * 4 + chan) * 4 );
-}
-
-
-static void
-emit_ret(
- struct x86_function *func )
-{
- x86_ret( func );
-}
-
-
-/**
- * Data fetch helpers.
- */
-
-/**
- * Copy a shader constant to xmm register
- * \param xmm the destination xmm register
- * \param vec the src const buffer index
- * \param chan src channel to fetch (X, Y, Z or W)
- */
-static void
-emit_const(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- sse_movss(
- func,
- make_xmm( xmm ),
- get_const( vec, chan ) );
- sse_shufps(
- func,
- make_xmm( xmm ),
- make_xmm( xmm ),
- SHUF( 0, 0, 0, 0 ) );
-}
-
-static void
-emit_immediate(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- sse_movss(
- func,
- make_xmm( xmm ),
- get_immediate( vec, chan ) );
- sse_shufps(
- func,
- make_xmm( xmm ),
- make_xmm( xmm ),
- SHUF( 0, 0, 0, 0 ) );
-}
-
-
-/**
- * Copy a shader input to xmm register
- * \param xmm the destination xmm register
- * \param vec the src input attrib
- * \param chan src channel to fetch (X, Y, Z or W)
- */
-static void
-emit_inputf(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- sse_movups(
- func,
- make_xmm( xmm ),
- get_input( vec, chan ) );
-}
-
-/**
- * Store an xmm register to a shader output
- * \param xmm the source xmm register
- * \param vec the dest output attrib
- * \param chan src dest channel to store (X, Y, Z or W)
- */
-static void
-emit_output(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- sse_movups(
- func,
- get_output( vec, chan ),
- make_xmm( xmm ) );
-}
-
-/**
- * Copy a shader temporary to xmm register
- * \param xmm the destination xmm register
- * \param vec the src temp register
- * \param chan src channel to fetch (X, Y, Z or W)
- */
-static void
-emit_tempf(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- sse_movaps(
- func,
- make_xmm( xmm ),
- get_temp( vec, chan ) );
-}
-
-/**
- * Load an xmm register with an input attrib coefficient (a0, dadx or dady)
- * \param xmm the destination xmm register
- * \param vec the src input/attribute coefficient index
- * \param chan src channel to fetch (X, Y, Z or W)
- * \param member 0=a0, 1=dadx, 2=dady
- */
-static void
-emit_coef(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan,
- unsigned member )
-{
- sse_movss(
- func,
- make_xmm( xmm ),
- get_coef( vec, chan, member ) );
- sse_shufps(
- func,
- make_xmm( xmm ),
- make_xmm( xmm ),
- SHUF( 0, 0, 0, 0 ) );
-}
-
-/**
- * Data store helpers.
- */
-
-static void
-emit_inputs(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- sse_movups(
- func,
- get_input( vec, chan ),
- make_xmm( xmm ) );
-}
-
-static void
-emit_temps(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- sse_movaps(
- func,
- get_temp( vec, chan ),
- make_xmm( xmm ) );
-}
-
-static void
-emit_addrs(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_temps(
- func,
- xmm,
- vec + TGSI_EXEC_NUM_TEMPS,
- chan );
-}
-
-/**
- * Coefficent fetch helpers.
- */
-
-static void
-emit_coef_a0(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_coef(
- func,
- xmm,
- vec,
- chan,
- 0 );
-}
-
-static void
-emit_coef_dadx(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_coef(
- func,
- xmm,
- vec,
- chan,
- 1 );
-}
-
-static void
-emit_coef_dady(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_coef(
- func,
- xmm,
- vec,
- chan,
- 2 );
-}
-
-/**
- * Function call helpers.
- */
-
-static void
-emit_push_gp(
- struct x86_function *func )
-{
- x86_push(
- func,
- x86_make_reg( file_REG32, reg_AX) );
- x86_push(
- func,
- x86_make_reg( file_REG32, reg_CX) );
- x86_push(
- func,
- x86_make_reg( file_REG32, reg_DX) );
-}
-
-static void
-x86_pop_gp(
- struct x86_function *func )
-{
- /* Restore GP registers in a reverse order.
- */
- x86_pop(
- func,
- x86_make_reg( file_REG32, reg_DX) );
- x86_pop(
- func,
- x86_make_reg( file_REG32, reg_CX) );
- x86_pop(
- func,
- x86_make_reg( file_REG32, reg_AX) );
-}
-
-static void
-emit_func_call_dst(
- struct x86_function *func,
- unsigned xmm_dst,
- void (PIPE_CDECL *code)() )
-{
- sse_movaps(
- func,
- get_temp( TEMP_R0, 0 ),
- make_xmm( xmm_dst ) );
-
- emit_push_gp(
- func );
-
- {
- struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-
- x86_lea(
- func,
- ecx,
- get_temp( TEMP_R0, 0 ) );
-
- x86_push( func, ecx );
- x86_mov_reg_imm( func, ecx, (unsigned long) code );
- x86_call( func, ecx );
- x86_pop(func, ecx );
- }
-
-
- x86_pop_gp(
- func );
-
- sse_movaps(
- func,
- make_xmm( xmm_dst ),
- get_temp( TEMP_R0, 0 ) );
-}
-
-static void
-emit_func_call_dst_src(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src,
- void (PIPE_CDECL *code)() )
-{
- sse_movaps(
- func,
- get_temp( TEMP_R0, 1 ),
- make_xmm( xmm_src ) );
-
- emit_func_call_dst(
- func,
- xmm_dst,
- code );
-}
-
-/**
- * Low-level instruction translators.
- */
-
-static void
-emit_abs(
- struct x86_function *func,
- unsigned xmm )
-{
- sse_andps(
- func,
- make_xmm( xmm ),
- get_temp(
- TGSI_EXEC_TEMP_7FFFFFFF_I,
- TGSI_EXEC_TEMP_7FFFFFFF_C ) );
-}
-
-static void
-emit_add(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- sse_addps(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-static void PIPE_CDECL
-cos4f(
- float *store )
-{
- const unsigned X = 0;
-
- store[X + 0] = cosf( store[X + 0] );
- store[X + 1] = cosf( store[X + 1] );
- store[X + 2] = cosf( store[X + 2] );
- store[X + 3] = cosf( store[X + 3] );
-}
-
-static void
-emit_cos(
- struct x86_function *func,
- unsigned xmm_dst )
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- cos4f );
-}
-
-static void PIPE_CDECL
-ex24f(
- float *store )
-{
- const unsigned X = 0;
-
- store[X + 0] = powf( 2.0f, store[X + 0] );
- store[X + 1] = powf( 2.0f, store[X + 1] );
- store[X + 2] = powf( 2.0f, store[X + 2] );
- store[X + 3] = powf( 2.0f, store[X + 3] );
-}
-
-static void
-emit_ex2(
- struct x86_function *func,
- unsigned xmm_dst )
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- ex24f );
-}
-
-static void
-emit_f2it(
- struct x86_function *func,
- unsigned xmm )
-{
- sse2_cvttps2dq(
- func,
- make_xmm( xmm ),
- make_xmm( xmm ) );
-}
-
-static void PIPE_CDECL
-flr4f(
- float *store )
-{
- const unsigned X = 0;
-
- store[X + 0] = floorf( store[X + 0] );
- store[X + 1] = floorf( store[X + 1] );
- store[X + 2] = floorf( store[X + 2] );
- store[X + 3] = floorf( store[X + 3] );
-}
-
-static void
-emit_flr(
- struct x86_function *func,
- unsigned xmm_dst )
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- flr4f );
-}
-
-static void PIPE_CDECL
-frc4f(
- float *store )
-{
- const unsigned X = 0;
-
- store[X + 0] -= floorf( store[X + 0] );
- store[X + 1] -= floorf( store[X + 1] );
- store[X + 2] -= floorf( store[X + 2] );
- store[X + 3] -= floorf( store[X + 3] );
-}
-
-static void
-emit_frc(
- struct x86_function *func,
- unsigned xmm_dst )
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- frc4f );
-}
-
-static void PIPE_CDECL
-lg24f(
- float *store )
-{
- const unsigned X = 0;
-
- store[X + 0] = LOG2( store[X + 0] );
- store[X + 1] = LOG2( store[X + 1] );
- store[X + 2] = LOG2( store[X + 2] );
- store[X + 3] = LOG2( store[X + 3] );
-}
-
-static void
-emit_lg2(
- struct x86_function *func,
- unsigned xmm_dst )
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- lg24f );
-}
-
-static void
-emit_MOV(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- sse_movups(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-static void
-emit_mul (struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src)
-{
- sse_mulps(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-static void
-emit_neg(
- struct x86_function *func,
- unsigned xmm )
-{
- sse_xorps(
- func,
- make_xmm( xmm ),
- get_temp(
- TGSI_EXEC_TEMP_80000000_I,
- TGSI_EXEC_TEMP_80000000_C ) );
-}
-
-static void PIPE_CDECL
-pow4f(
- float *store )
-{
- const unsigned X = 0;
-
- store[X + 0] = powf( store[X + 0], store[X + 4] );
- store[X + 1] = powf( store[X + 1], store[X + 5] );
- store[X + 2] = powf( store[X + 2], store[X + 6] );
- store[X + 3] = powf( store[X + 3], store[X + 7] );
-}
-
-static void
-emit_pow(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- emit_func_call_dst_src(
- func,
- xmm_dst,
- xmm_src,
- pow4f );
-}
-
-static void
-emit_rcp (
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- /* On Intel CPUs at least, this is only accurate to 12 bits -- not
- * good enough. Need to either emit a proper divide or use the
- * iterative technique described below in emit_rsqrt().
- */
- sse2_rcpps(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-static void
-emit_rsqrt(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
-#if HIGH_PRECISION
- /* Although rsqrtps() and rcpps() are low precision on some/all SSE
- * implementations, it is possible to improve its precision at
- * fairly low cost, using a newton/raphson step, as below:
- *
- * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
- * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
- *
- * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
- */
- {
- struct x86_reg dst = make_xmm( xmm_dst );
- struct x86_reg src = make_xmm( xmm_src );
- struct x86_reg tmp0 = make_xmm( 2 );
- struct x86_reg tmp1 = make_xmm( 3 );
-
- assert( xmm_dst != xmm_src );
- assert( xmm_dst != 2 && xmm_dst != 3 );
- assert( xmm_src != 2 && xmm_src != 3 );
-
- sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
- sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
- sse_rsqrtps( func, tmp1, src );
- sse_mulps( func, src, tmp1 );
- sse_mulps( func, dst, tmp1 );
- sse_mulps( func, src, tmp1 );
- sse_subps( func, tmp0, src );
- sse_mulps( func, dst, tmp0 );
- }
-#else
- /* On Intel CPUs at least, this is only accurate to 12 bits -- not
- * good enough.
- */
- sse_rsqrtps(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-#endif
-}
-
-static void
-emit_setsign(
- struct x86_function *func,
- unsigned xmm )
-{
- sse_orps(
- func,
- make_xmm( xmm ),
- get_temp(
- TGSI_EXEC_TEMP_80000000_I,
- TGSI_EXEC_TEMP_80000000_C ) );
-}
-
-static void PIPE_CDECL
-sin4f(
- float *store )
-{
- const unsigned X = 0;
-
- store[X + 0] = sinf( store[X + 0] );
- store[X + 1] = sinf( store[X + 1] );
- store[X + 2] = sinf( store[X + 2] );
- store[X + 3] = sinf( store[X + 3] );
-}
-
-static void
-emit_sin (struct x86_function *func,
- unsigned xmm_dst)
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- sin4f );
-}
-
-static void
-emit_sub(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- sse_subps(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-/**
- * Register fetch.
- */
-
-static void
-emit_fetch(
- struct x86_function *func,
- unsigned xmm,
- const struct tgsi_full_src_register *reg,
- const unsigned chan_index )
-{
- unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
-
- switch( swizzle ) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
- switch( reg->SrcRegister.File ) {
- case TGSI_FILE_CONSTANT:
- emit_const(
- func,
- xmm,
- reg->SrcRegister.Index,
- swizzle );
- break;
-
- case TGSI_FILE_IMMEDIATE:
- emit_immediate(
- func,
- xmm,
- reg->SrcRegister.Index,
- swizzle );
- break;
-
- case TGSI_FILE_INPUT:
- emit_inputf(
- func,
- xmm,
- reg->SrcRegister.Index,
- swizzle );
- break;
-
- case TGSI_FILE_TEMPORARY:
- emit_tempf(
- func,
- xmm,
- reg->SrcRegister.Index,
- swizzle );
- break;
-
- default:
- assert( 0 );
- }
- break;
-
- case TGSI_EXTSWIZZLE_ZERO:
- emit_tempf(
- func,
- xmm,
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C );
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- emit_tempf(
- func,
- xmm,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
- break;
-
- default:
- assert( 0 );
- }
-
- switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
- case TGSI_UTIL_SIGN_CLEAR:
- emit_abs( func, xmm );
- break;
-
- case TGSI_UTIL_SIGN_SET:
- emit_setsign( func, xmm );
- break;
-
- case TGSI_UTIL_SIGN_TOGGLE:
- emit_neg( func, xmm );
- break;
-
- case TGSI_UTIL_SIGN_KEEP:
- break;
- }
-}
-
-#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
- emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
-
-/**
- * Register store.
- */
-
-static void
-emit_store(
- struct x86_function *func,
- unsigned xmm,
- const struct tgsi_full_dst_register *reg,
- const struct tgsi_full_instruction *inst,
- unsigned chan_index )
-{
- switch( reg->DstRegister.File ) {
- case TGSI_FILE_OUTPUT:
- emit_output(
- func,
- xmm,
- reg->DstRegister.Index,
- chan_index );
- break;
-
- case TGSI_FILE_TEMPORARY:
- emit_temps(
- func,
- xmm,
- reg->DstRegister.Index,
- chan_index );
- break;
-
- case TGSI_FILE_ADDRESS:
- emit_addrs(
- func,
- xmm,
- reg->DstRegister.Index,
- chan_index );
- break;
-
- default:
- assert( 0 );
- }
-
- switch( inst->Instruction.Saturate ) {
- case TGSI_SAT_NONE:
- break;
-
- case TGSI_SAT_ZERO_ONE:
- /* assert( 0 ); */
- break;
-
- case TGSI_SAT_MINUS_PLUS_ONE:
- assert( 0 );
- break;
- }
-}
-
-#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
- emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
-
-/**
- * High-level instruction translators.
- */
-
-static void
-emit_kil(
- struct x86_function *func,
- const struct tgsi_full_src_register *reg )
-{
- unsigned uniquemask;
- unsigned registers[4];
- unsigned nextregister = 0;
- unsigned firstchan = ~0;
- unsigned chan_index;
-
- /* This mask stores component bits that were already tested. Note that
- * we test if the value is less than zero, so 1.0 and 0.0 need not to be
- * tested. */
- uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
-
- FOR_EACH_CHANNEL( chan_index ) {
- unsigned swizzle;
-
- /* unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_extswizzle(
- reg,
- chan_index );
-
- /* check if the component has not been already tested */
- if( !(uniquemask & (1 << swizzle)) ) {
- uniquemask |= 1 << swizzle;
-
- /* allocate register */
- registers[chan_index] = nextregister;
- emit_fetch(
- func,
- nextregister,
- reg,
- chan_index );
- nextregister++;
-
- /* mark the first channel used */
- if( firstchan == ~0 ) {
- firstchan = chan_index;
- }
- }
- }
-
- x86_push(
- func,
- x86_make_reg( file_REG32, reg_AX ) );
- x86_push(
- func,
- x86_make_reg( file_REG32, reg_DX ) );
-
- FOR_EACH_CHANNEL( chan_index ) {
- if( uniquemask & (1 << chan_index) ) {
- sse_cmpps(
- func,
- make_xmm( registers[chan_index] ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ),
- cc_LessThan );
-
- if( chan_index == firstchan ) {
- sse_pmovmskb(
- func,
- x86_make_reg( file_REG32, reg_AX ),
- make_xmm( registers[chan_index] ) );
- }
- else {
- sse_pmovmskb(
- func,
- x86_make_reg( file_REG32, reg_DX ),
- make_xmm( registers[chan_index] ) );
- x86_or(
- func,
- x86_make_reg( file_REG32, reg_AX ),
- x86_make_reg( file_REG32, reg_DX ) );
- }
- }
- }
-
- x86_or(
- func,
- get_temp(
- TGSI_EXEC_TEMP_KILMASK_I,
- TGSI_EXEC_TEMP_KILMASK_C ),
- x86_make_reg( file_REG32, reg_AX ) );
-
- x86_pop(
- func,
- x86_make_reg( file_REG32, reg_DX ) );
- x86_pop(
- func,
- x86_make_reg( file_REG32, reg_AX ) );
-}
-
-static void
-emit_setcc(
- struct x86_function *func,
- struct tgsi_full_instruction *inst,
- enum sse_cc cc )
-{
- unsigned chan_index;
-
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- sse_cmpps(
- func,
- make_xmm( 0 ),
- make_xmm( 1 ),
- cc );
- sse_andps(
- func,
- make_xmm( 0 ),
- get_temp(
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C ) );
- STORE( func, *inst, 0, 0, chan_index );
- }
-}
-
-static void
-emit_cmp(
- struct x86_function *func,
- struct tgsi_full_instruction *inst )
-{
- unsigned chan_index;
-
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- FETCH( func, *inst, 2, 2, chan_index );
- sse_cmpps(
- func,
- make_xmm( 0 ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ),
- cc_LessThan );
- sse_andps(
- func,
- make_xmm( 1 ),
- make_xmm( 0 ) );
- sse_andnps(
- func,
- make_xmm( 0 ),
- make_xmm( 2 ) );
- sse_orps(
- func,
- make_xmm( 0 ),
- make_xmm( 1 ) );
- STORE( func, *inst, 0, 0, chan_index );
- }
-}
-
-static int
-emit_instruction(
- struct x86_function *func,
- struct tgsi_full_instruction *inst )
-{
- unsigned chan_index;
-
- switch( inst->Instruction.Opcode ) {
- case TGSI_OPCODE_ARL:
-#if 0
- /* XXX this isn't working properly (see glean vertProg1 test) */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- emit_f2it( func, 0 );
- STORE( func, *inst, 0, 0, chan_index );
- }
-#else
- return 0;
-#endif
- break;
-
- case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LIT:
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
- emit_tempf(
- func,
- 0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C);
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
- STORE( func, *inst, 0, 0, CHAN_X );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
- STORE( func, *inst, 0, 0, CHAN_W );
- }
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- FETCH( func, *inst, 0, 0, CHAN_X );
- sse_maxps(
- func,
- make_xmm( 0 ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ) );
- STORE( func, *inst, 0, 0, CHAN_Y );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- /* XMM[1] = SrcReg[0].yyyy */
- FETCH( func, *inst, 1, 0, CHAN_Y );
- /* XMM[1] = max(XMM[1], 0) */
- sse_maxps(
- func,
- make_xmm( 1 ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ) );
- /* XMM[2] = SrcReg[0].wwww */
- FETCH( func, *inst, 2, 0, CHAN_W );
- /* XMM[2] = min(XMM[2], 128.0) */
- sse_minps(
- func,
- make_xmm( 2 ),
- get_temp(
- TGSI_EXEC_TEMP_128_I,
- TGSI_EXEC_TEMP_128_C ) );
- /* XMM[2] = max(XMM[2], -128.0) */
- sse_maxps(
- func,
- make_xmm( 2 ),
- get_temp(
- TGSI_EXEC_TEMP_MINUS_128_I,
- TGSI_EXEC_TEMP_MINUS_128_C ) );
- emit_pow( func, 1, 2 );
- FETCH( func, *inst, 0, 0, CHAN_X );
- sse_xorps(
- func,
- make_xmm( 2 ),
- make_xmm( 2 ) );
- sse_cmpps(
- func,
- make_xmm( 2 ),
- make_xmm( 0 ),
- cc_LessThanEqual );
- sse_andps(
- func,
- make_xmm( 2 ),
- make_xmm( 1 ) );
- STORE( func, *inst, 2, 0, CHAN_Z );
- }
- }
- break;
-
- case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_rcp( func, 0, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_rsqrt( func, 1, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 1, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_EXP:
- return 0;
- break;
-
- case TGSI_OPCODE_LOG:
- return 0;
- break;
-
- case TGSI_OPCODE_MUL:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_mul( func, 0, 1 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_ADD:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_add( func, 0, 1 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DP3:
- /* TGSI_OPCODE_DOT3 */
- FETCH( func, *inst, 0, 0, CHAN_X );
- FETCH( func, *inst, 1, 1, CHAN_X );
- emit_mul( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Y );
- FETCH( func, *inst, 2, 1, CHAN_Y );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Z );
- FETCH( func, *inst, 2, 1, CHAN_Z );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DP4:
- /* TGSI_OPCODE_DOT4 */
- FETCH( func, *inst, 0, 0, CHAN_X );
- FETCH( func, *inst, 1, 1, CHAN_X );
- emit_mul( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Y );
- FETCH( func, *inst, 2, 1, CHAN_Y );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Z );
- FETCH( func, *inst, 2, 1, CHAN_Z );
- emit_mul(func, 1, 2 );
- emit_add(func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_W );
- FETCH( func, *inst, 2, 1, CHAN_W );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DST:
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
- emit_tempf(
- func,
- 0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
- STORE( func, *inst, 0, 0, CHAN_X );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
- FETCH( func, *inst, 0, 0, CHAN_Y );
- FETCH( func, *inst, 1, 1, CHAN_Y );
- emit_mul( func, 0, 1 );
- STORE( func, *inst, 0, 0, CHAN_Y );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
- FETCH( func, *inst, 0, 0, CHAN_Z );
- STORE( func, *inst, 0, 0, CHAN_Z );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
- FETCH( func, *inst, 0, 1, CHAN_W );
- STORE( func, *inst, 0, 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_MIN:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- sse_minps(
- func,
- make_xmm( 0 ),
- make_xmm( 1 ) );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MAX:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- sse_maxps(
- func,
- make_xmm( 0 ),
- make_xmm( 1 ) );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SLT:
- /* TGSI_OPCODE_SETLT */
- emit_setcc( func, inst, cc_LessThan );
- break;
-
- case TGSI_OPCODE_SGE:
- /* TGSI_OPCODE_SETGE */
- emit_setcc( func, inst, cc_NotLessThan );
- break;
-
- case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- FETCH( func, *inst, 2, 2, chan_index );
- emit_mul( func, 0, 1 );
- emit_add( func, 0, 2 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SUB:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_sub( func, 0, 1 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- FETCH( func, *inst, 2, 2, chan_index );
- emit_sub( func, 1, 2 );
- emit_mul( func, 0, 1 );
- emit_add( func, 0, 2 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_CND:
- return 0;
- break;
-
- case TGSI_OPCODE_CND0:
- return 0;
- break;
-
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
- return 0;
- break;
-
- case TGSI_OPCODE_INDEX:
- return 0;
- break;
-
- case TGSI_OPCODE_NEGATE:
- return 0;
- break;
-
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- emit_frc( func, 0 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_CLAMP:
- return 0;
- break;
-
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- emit_flr( func, 0 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_ROUND:
- return 0;
- break;
-
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_ex2( func, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_lg2( func, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
- FETCH( func, *inst, 0, 0, CHAN_X );
- FETCH( func, *inst, 1, 1, CHAN_X );
- emit_pow( func, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- FETCH( func, *inst, 1, 1, CHAN_Z );
- FETCH( func, *inst, 3, 0, CHAN_Z );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- FETCH( func, *inst, 0, 0, CHAN_Y );
- FETCH( func, *inst, 4, 1, CHAN_Y );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
- emit_MOV( func, 2, 0 );
- emit_mul( func, 2, 1 );
- emit_MOV( func, 5, 3 );
- emit_mul( func, 5, 4 );
- emit_sub( func, 2, 5 );
- STORE( func, *inst, 2, 0, CHAN_X );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- FETCH( func, *inst, 2, 1, CHAN_X );
- FETCH( func, *inst, 5, 0, CHAN_X );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
- emit_mul( func, 3, 2 );
- emit_mul( func, 1, 5 );
- emit_sub( func, 3, 1 );
- STORE( func, *inst, 3, 0, CHAN_Y );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
- emit_mul( func, 5, 4 );
- emit_mul( func, 0, 2 );
- emit_sub( func, 5, 0 );
- STORE( func, *inst, 5, 0, CHAN_Z );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
- emit_tempf(
- func,
- 0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
- STORE( func, *inst, 0, 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_MULTIPLYMATRIX:
- return 0;
- break;
-
- case TGSI_OPCODE_ABS:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- emit_abs( func, 0) ;
-
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_RCC:
- return 0;
- break;
-
- case TGSI_OPCODE_DPH:
- FETCH( func, *inst, 0, 0, CHAN_X );
- FETCH( func, *inst, 1, 1, CHAN_X );
- emit_mul( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Y );
- FETCH( func, *inst, 2, 1, CHAN_Y );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Z );
- FETCH( func, *inst, 2, 1, CHAN_Z );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FETCH( func, *inst, 1, 1, CHAN_W );
- emit_add( func, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_COS:
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_cos( func, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DDX:
- return 0;
- break;
-
- case TGSI_OPCODE_DDY:
- return 0;
- break;
-
- case TGSI_OPCODE_KIL:
- emit_kil( func, &inst->FullSrcRegisters[0] );
- break;
-
- case TGSI_OPCODE_PK2H:
- return 0;
- break;
-
- case TGSI_OPCODE_PK2US:
- return 0;
- break;
-
- case TGSI_OPCODE_PK4B:
- return 0;
- break;
-
- case TGSI_OPCODE_PK4UB:
- return 0;
- break;
-
- case TGSI_OPCODE_RFL:
- return 0;
- break;
-
- case TGSI_OPCODE_SEQ:
- return 0;
- break;
-
- case TGSI_OPCODE_SFL:
- return 0;
- break;
-
- case TGSI_OPCODE_SGT:
- return 0;
- break;
-
- case TGSI_OPCODE_SIN:
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_sin( func, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SLE:
- return 0;
- break;
-
- case TGSI_OPCODE_SNE:
- return 0;
- break;
-
- case TGSI_OPCODE_STR:
- return 0;
- break;
-
- case TGSI_OPCODE_TEX:
- if (0) {
- /* Disable dummy texture code:
- */
- emit_tempf(
- func,
- 0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- }
- else {
- return 0;
- }
- break;
-
- case TGSI_OPCODE_TXD:
- return 0;
- break;
-
- case TGSI_OPCODE_UP2H:
- return 0;
- break;
-
- case TGSI_OPCODE_UP2US:
- return 0;
- break;
-
- case TGSI_OPCODE_UP4B:
- return 0;
- break;
-
- case TGSI_OPCODE_UP4UB:
- return 0;
- break;
-
- case TGSI_OPCODE_X2D:
- return 0;
- break;
-
- case TGSI_OPCODE_ARA:
- return 0;
- break;
-
- case TGSI_OPCODE_ARR:
- return 0;
- break;
-
- case TGSI_OPCODE_BRA:
- return 0;
- break;
-
- case TGSI_OPCODE_CAL:
- return 0;
- break;
-
- case TGSI_OPCODE_RET:
- emit_ret( func );
- break;
-
- case TGSI_OPCODE_END:
- break;
-
- case TGSI_OPCODE_SSG:
- return 0;
- break;
-
- case TGSI_OPCODE_CMP:
- emit_cmp (func, inst);
- break;
-
- case TGSI_OPCODE_SCS:
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_cos( func, 0 );
- STORE( func, *inst, 0, 0, CHAN_X );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_sin( func, 0 );
- STORE( func, *inst, 0, 0, CHAN_Y );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
- emit_tempf(
- func,
- 0,
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C );
- STORE( func, *inst, 0, 0, CHAN_Z );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
- emit_tempf(
- func,
- 0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
- STORE( func, *inst, 0, 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_TXB:
- return 0;
- break;
-
- case TGSI_OPCODE_NRM:
- return 0;
- break;
-
- case TGSI_OPCODE_DIV:
- return 0;
- break;
-
- case TGSI_OPCODE_DP2:
- return 0;
- break;
-
- case TGSI_OPCODE_TXL:
- return 0;
- break;
-
- case TGSI_OPCODE_BRK:
- return 0;
- break;
-
- case TGSI_OPCODE_IF:
- return 0;
- break;
-
- case TGSI_OPCODE_LOOP:
- return 0;
- break;
-
- case TGSI_OPCODE_REP:
- return 0;
- break;
-
- case TGSI_OPCODE_ELSE:
- return 0;
- break;
-
- case TGSI_OPCODE_ENDIF:
- return 0;
- break;
-
- case TGSI_OPCODE_ENDLOOP:
- return 0;
- break;
-
- case TGSI_OPCODE_ENDREP:
- return 0;
- break;
-
- case TGSI_OPCODE_PUSHA:
- return 0;
- break;
-
- case TGSI_OPCODE_POPA:
- return 0;
- break;
-
- case TGSI_OPCODE_CEIL:
- return 0;
- break;
-
- case TGSI_OPCODE_I2F:
- return 0;
- break;
-
- case TGSI_OPCODE_NOT:
- return 0;
- break;
-
- case TGSI_OPCODE_TRUNC:
- return 0;
- break;
-
- case TGSI_OPCODE_SHL:
- return 0;
- break;
-
- case TGSI_OPCODE_SHR:
- return 0;
- break;
-
- case TGSI_OPCODE_AND:
- return 0;
- break;
-
- case TGSI_OPCODE_OR:
- return 0;
- break;
-
- case TGSI_OPCODE_MOD:
- return 0;
- break;
-
- case TGSI_OPCODE_XOR:
- return 0;
- break;
-
- case TGSI_OPCODE_SAD:
- return 0;
- break;
-
- case TGSI_OPCODE_TXF:
- return 0;
- break;
-
- case TGSI_OPCODE_TXQ:
- return 0;
- break;
-
- case TGSI_OPCODE_CONT:
- return 0;
- break;
-
- case TGSI_OPCODE_EMIT:
- return 0;
- break;
-
- case TGSI_OPCODE_ENDPRIM:
- return 0;
- break;
-
- default:
- return 0;
- }
-
- return 1;
-}
-
-static void
-emit_declaration(
- struct x86_function *func,
- struct tgsi_full_declaration *decl )
-{
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- unsigned i, j;
-
- first = decl->DeclarationRange.First;
- last = decl->DeclarationRange.Last;
- mask = decl->Declaration.UsageMask;
-
- for( i = first; i <= last; i++ ) {
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- switch( decl->Declaration.Interpolate ) {
- case TGSI_INTERPOLATE_CONSTANT:
- emit_coef_a0( func, 0, i, j );
- emit_inputs( func, 0, i, j );
- break;
-
- case TGSI_INTERPOLATE_LINEAR:
- emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
- emit_coef_dadx( func, 1, i, j );
- emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
- emit_coef_dady( func, 3, i, j );
- emit_mul( func, 0, 1 ); /* x * dadx */
- emit_coef_a0( func, 4, i, j );
- emit_mul( func, 2, 3 ); /* y * dady */
- emit_add( func, 0, 4 ); /* x * dadx + a0 */
- emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
- emit_inputs( func, 0, i, j );
- break;
-
- case TGSI_INTERPOLATE_PERSPECTIVE:
- emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
- emit_coef_dadx( func, 1, i, j );
- emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
- emit_coef_dady( func, 3, i, j );
- emit_mul( func, 0, 1 ); /* x * dadx */
- emit_tempf( func, 4, 0, TGSI_SWIZZLE_W );
- emit_coef_a0( func, 5, i, j );
- emit_rcp( func, 4, 4 ); /* 1.0 / w */
- emit_mul( func, 2, 3 ); /* y * dady */
- emit_add( func, 0, 5 ); /* x * dadx + a0 */
- emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
- emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */
- emit_inputs( func, 0, i, j );
- break;
-
- default:
- assert( 0 );
- break;
- }
- }
- }
- }
- }
-}
-
-static void aos_to_soa( struct x86_function *func,
- uint arg_aos,
- uint arg_soa,
- uint arg_num,
- uint arg_stride )
-{
- struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX );
- struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX );
- struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX );
- struct x86_reg stride = x86_make_reg( file_REG32, reg_DX );
- int inner_loop;
-
-
- /* Save EBX */
- x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
-
- x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) );
- x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) );
- x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
- x86_mov( func, stride, x86_fn_arg( func, arg_stride ) );
-
- /* do */
- inner_loop = x86_get_label( func );
- {
- x86_push( func, aos_input );
- sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
- sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
- x86_add( func, aos_input, stride );
- sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
- sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
- x86_add( func, aos_input, stride );
- sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
- sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
- x86_add( func, aos_input, stride );
- sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
- sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
- x86_pop( func, aos_input );
-
- sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
- sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
- sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 );
- sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd );
- sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 );
- sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd );
-
- sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) );
- sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) );
- sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) );
- sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) );
-
- /* Advance to next input */
- x86_lea( func, aos_input, x86_make_disp(aos_input, 16) );
- x86_lea( func, soa_input, x86_make_disp(soa_input, 64) );
- }
- /* while --num_inputs */
- x86_dec( func, num_inputs );
- x86_jcc( func, cc_NE, inner_loop );
-
- /* Restore EBX */
- x86_pop( func, aos_input );
-}
-
-static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
-{
- struct x86_reg soa_output;
- struct x86_reg aos_output;
- struct x86_reg num_outputs;
- struct x86_reg temp;
- int inner_loop;
-
- soa_output = x86_make_reg( file_REG32, reg_AX );
- aos_output = x86_make_reg( file_REG32, reg_BX );
- num_outputs = x86_make_reg( file_REG32, reg_CX );
- temp = x86_make_reg( file_REG32, reg_DX );
-
- /* Save EBX */
- x86_push( func, aos_output );
-
- x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
- x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
- x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
-
- /* do */
- inner_loop = x86_get_label( func );
- {
- sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) );
- sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) );
- sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) );
- sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) );
-
- sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
- sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
- sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) );
- sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) );
- sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
- sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
-
- x86_mov( func, temp, x86_fn_arg( func, stride ) );
- x86_push( func, aos_output );
- sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
- sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
- x86_add( func, aos_output, temp );
- sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
- sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
- x86_add( func, aos_output, temp );
- sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
- sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
- x86_add( func, aos_output, temp );
- sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
- sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
- x86_pop( func, aos_output );
-
- /* Advance to next output */
- x86_lea( func, aos_output, x86_make_disp(aos_output, 16) );
- x86_lea( func, soa_output, x86_make_disp(soa_output, 64) );
- }
- /* while --num_outputs */
- x86_dec( func, num_outputs );
- x86_jcc( func, cc_NE, inner_loop );
-
- /* Restore EBX */
- x86_pop( func, aos_output );
-}
-
-/**
- * Translate a TGSI vertex/fragment shader to SSE2 code.
- * Slightly different things are done for vertex vs. fragment shaders.
- *
- * Note that fragment shaders are responsible for interpolating shader
- * inputs. Because on x86 we have only 4 GP registers, and here we
- * have 5 shader arguments (input, output, const, temp and coef), the
- * code is split into two phases -- DECLARATION and INSTRUCTION phase.
- * GP register holding the output argument is aliased with the coeff
- * argument, as outputs are not needed in the DECLARATION phase.
- *
- * \param tokens the TGSI input shader
- * \param func the output SSE code/function
- * \param immediates buffer to place immediates, later passed to SSE func
- * \param return 1 for success, 0 if translation failed
- */
-unsigned
-tgsi_emit_sse2(
- const struct tgsi_token *tokens,
- struct x86_function *func,
- float (*immediates)[4],
- boolean do_swizzles )
-{
- struct tgsi_parse_context parse;
- boolean instruction_phase = FALSE;
- unsigned ok = 1;
- uint num_immediates = 0;
-
- func->csr = func->store;
-
- tgsi_parse_init( &parse, tokens );
-
- /* Can't just use EDI, EBX without save/restoring them:
- */
- x86_push(
- func,
- get_immediate_base() );
-
- x86_push(
- func,
- get_temp_base() );
-
-
- /*
- * Different function args for vertex/fragment shaders:
- */
- if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
- /* DECLARATION phase, do not load output argument. */
- x86_mov(
- func,
- get_input_base(),
- x86_fn_arg( func, 1 ) );
- /* skipping outputs argument here */
- x86_mov(
- func,
- get_const_base(),
- x86_fn_arg( func, 3 ) );
- x86_mov(
- func,
- get_temp_base(),
- x86_fn_arg( func, 4 ) );
- x86_mov(
- func,
- get_coef_base(),
- x86_fn_arg( func, 5 ) );
- x86_mov(
- func,
- get_immediate_base(),
- x86_fn_arg( func, 6 ) );
- }
- else {
- assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
-
- if (do_swizzles)
- aos_to_soa( func,
- 6, /* aos_input */
- 1, /* machine->input */
- 7, /* num_inputs */
- 8 ); /* input_stride */
-
- x86_mov(
- func,
- get_input_base(),
- x86_fn_arg( func, 1 ) );
- x86_mov(
- func,
- get_output_base(),
- x86_fn_arg( func, 2 ) );
- x86_mov(
- func,
- get_const_base(),
- x86_fn_arg( func, 3 ) );
- x86_mov(
- func,
- get_temp_base(),
- x86_fn_arg( func, 4 ) );
- x86_mov(
- func,
- get_immediate_base(),
- x86_fn_arg( func, 5 ) );
- }
-
- while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
- tgsi_parse_token( &parse );
-
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
- emit_declaration(
- func,
- &parse.FullToken.FullDeclaration );
- }
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
- if( !instruction_phase ) {
- /* INSTRUCTION phase, overwrite coeff with output. */
- instruction_phase = TRUE;
- x86_mov(
- func,
- get_output_base(),
- x86_fn_arg( func, 2 ) );
- }
- }
-
- ok = emit_instruction(
- func,
- &parse.FullToken.FullInstruction );
-
- if (!ok) {
- debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n",
- parse.FullToken.FullInstruction.Instruction.Opcode,
- parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
- "vertex shader" : "fragment shader");
- }
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- /* simply copy the immediate values into the next immediates[] slot */
- {
- const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
- uint i;
- assert(size <= 4);
- assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
- for( i = 0; i < size; i++ ) {
- immediates[num_immediates][i] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
- }
-#if 0
- debug_printf("SSE FS immediate[%d] = %f %f %f %f\n",
- num_immediates,
- immediates[num_immediates][0],
- immediates[num_immediates][1],
- immediates[num_immediates][2],
- immediates[num_immediates][3]);
-#endif
- num_immediates++;
- }
- break;
-
- default:
- ok = 0;
- assert( 0 );
- }
- }
-
- if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
- if (do_swizzles)
- soa_to_aos( func, 9, 2, 10, 11 );
- }
-
- /* Can't just use EBX, EDI without save/restoring them:
- */
- x86_pop(
- func,
- get_temp_base() );
-
- x86_pop(
- func,
- get_immediate_base() );
-
- emit_ret( func );
-
- tgsi_parse_free( &parse );
-
- return ok;
-}
-
-#endif /* PIPE_ARCH_X86 */
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
deleted file mode 100755
index af838b2a25..0000000000
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef TGSI_SSE2_H
-#define TGSI_SSE2_H
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-struct tgsi_token;
-struct x86_function;
-
-unsigned
-tgsi_emit_sse2(
- const struct tgsi_token *tokens,
- struct x86_function *function,
- float (*immediates)[4],
- boolean do_swizzles );
-
-#if defined __cplusplus
-}
-#endif
-
-#endif /* TGSI_SSE2_H */