summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
authorMichal Krol <michal@vmware.com>2010-01-05 11:04:50 +0100
committerMichal Krol <michal@vmware.com>2010-01-05 11:04:50 +0100
commit9b21b3c52a8a7d58d08151d1a6bf25c472dec213 (patch)
treed9083b6af4e2e9b70a7fa6cd31bac45a36e0f6b6 /src/gallium/auxiliary/tgsi
parent543b9566bdaa48fea2df1866fa1310c1cdbcde27 (diff)
parent1f9aa38f4e2be47229d92be2c1189c2b8d9c7133 (diff)
Merge branch 'master' into instanced-arrays
Conflicts: src/gallium/auxiliary/tgsi/tgsi_dump.c src/gallium/include/pipe/p_shader_tokens.h
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r--src/gallium/auxiliary/tgsi/Makefile22
-rw-r--r--src/gallium/auxiliary/tgsi/SConscript23
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c91
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c1879
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h47
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c30
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h23
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c22
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c287
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c380
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c152
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h94
15 files changed, 1929 insertions, 1129 deletions
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
deleted file mode 100644
index 5f0a580b09..0000000000
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = tgsi
-
-C_SOURCES = \
- tgsi_sanity.c \
- tgsi_build.c \
- tgsi_dump.c \
- tgsi_exec.c \
- tgsi_info.c \
- tgsi_iterate.c \
- tgsi_parse.c \
- tgsi_ppc.c \
- tgsi_scan.c \
- tgsi_sse2.c \
- tgsi_text.c \
- tgsi_transform.c \
- tgsi_ureg.c \
- tgsi_util.c
-
-include ../../Makefile.template
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
deleted file mode 100644
index b6bc2924f0..0000000000
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ /dev/null
@@ -1,23 +0,0 @@
-Import('*')
-
-tgsi = env.ConvenienceLibrary(
- target = 'tgsi',
- source = [
- 'tgsi_build.c',
- 'tgsi_dump.c',
- 'tgsi_dump_c.c',
- 'tgsi_exec.c',
- 'tgsi_info.c',
- 'tgsi_iterate.c',
- 'tgsi_parse.c',
- 'tgsi_sanity.c',
- 'tgsi_scan.c',
- 'tgsi_ppc.c',
- 'tgsi_sse2.c',
- 'tgsi_text.c',
- 'tgsi_transform.c',
- 'tgsi_ureg.c',
- 'tgsi_util.c',
- ])
-
-auxiliaries.insert(0, tgsi)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 92903fe57f..de9cbc8630 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -1025,7 +1025,7 @@ tgsi_build_full_property(
size++;
*property = tgsi_build_property(
- TGSI_PROPERTY_GS_INPUT_PRIM,
+ full_prop->Property.PropertyName,
header );
assert( full_prop->Property.NrTokens <= 8 + 1 );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 4391ca75d1..d7ff262f30 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -123,12 +123,15 @@ static const char *semantic_names[] =
"NORMAL",
"FACE",
"EDGEFLAG",
+ "PRIM_ID",
"INSTANCEID"
};
static const char *immediate_type_names[] =
{
- "FLT32"
+ "FLT32",
+ "UINT32",
+ "INT32"
};
static const char *swizzle_names[] =
@@ -175,13 +178,19 @@ static const char *primitive_names[] =
static void
-_dump_register(
+_dump_register_decl(
struct dump_ctx *ctx,
uint file,
int first,
int last )
{
ENM( file, file_names );
+
+ /* all geometry shader inputs are two dimensional */
+ if (file == TGSI_FILE_INPUT &&
+ ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY)
+ TXT("[]");
+
CHR( '[' );
SID( first );
if (first != last) {
@@ -192,6 +201,52 @@ _dump_register(
}
static void
+_dump_register_dst(
+ struct dump_ctx *ctx,
+ uint file,
+ int index)
+{
+ ENM( file, file_names );
+
+ CHR( '[' );
+ SID( index );
+ CHR( ']' );
+}
+
+
+static void
+_dump_register_src(
+ struct dump_ctx *ctx,
+ const struct tgsi_full_src_register *src )
+{
+ if (src->Register.Indirect) {
+ ENM( src->Register.File, file_names );
+ CHR( '[' );
+ ENM( src->Indirect.File, file_names );
+ CHR( '[' );
+ SID( src->Indirect.Index );
+ TXT( "]." );
+ ENM( src->Indirect.SwizzleX, swizzle_names );
+ if (src->Register.Index != 0) {
+ if (src->Register.Index > 0)
+ CHR( '+' );
+ SID( src->Register.Index );
+ }
+ CHR( ']' );
+ } else {
+ ENM( src->Register.File, file_names );
+ CHR( '[' );
+ SID( src->Register.Index );
+ CHR( ']' );
+ }
+ if (src->Register.Dimension) {
+ CHR( '[' );
+ SID( src->Dimension.Index );
+ CHR( ']' );
+ }
+}
+
+static void
_dump_register_ind(
struct dump_ctx *ctx,
uint file,
@@ -245,7 +300,7 @@ iter_declaration(
TXT( "DCL " );
- _dump_register(
+ _dump_register_decl(
ctx,
decl->Declaration.File,
decl->Range.First,
@@ -360,6 +415,12 @@ iter_immediate(
case TGSI_IMM_FLOAT32:
FLT( imm->u[i].Float );
break;
+ case TGSI_IMM_UINT32:
+ UID(imm->u[i].Uint);
+ break;
+ case TGSI_IMM_INT32:
+ SID(imm->u[i].Int);
+ break;
default:
assert( 0 );
}
@@ -436,10 +497,9 @@ iter_instruction(
dst->Indirect.SwizzleX );
}
else {
- _dump_register(
+ _dump_register_dst(
ctx,
dst->Register.File,
- dst->Register.Index,
dst->Register.Index );
}
_dump_writemask( ctx, dst->Register.WriteMask );
@@ -455,26 +515,11 @@ iter_instruction(
CHR( ' ' );
if (src->Register.Negate)
- TXT( "-(" );
+ CHR( '-' );
if (src->Register.Absolute)
CHR( '|' );
- if (src->Register.Indirect) {
- _dump_register_ind(
- ctx,
- src->Register.File,
- src->Register.Index,
- src->Indirect.File,
- src->Indirect.Index,
- src->Indirect.SwizzleX );
- }
- else {
- _dump_register(
- ctx,
- src->Register.File,
- src->Register.Index,
- src->Register.Index );
- }
+ _dump_register_src(ctx, src);
if (src->Register.SwizzleX != TGSI_SWIZZLE_X ||
src->Register.SwizzleY != TGSI_SWIZZLE_Y ||
@@ -489,8 +534,6 @@ iter_instruction(
if (src->Register.Absolute)
CHR( '|' );
- if (src->Register.Negate)
- CHR( ')' );
first_reg = FALSE;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 123117cb0a..f43233bdb4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2,6 +2,7 @@
*
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2009-2010 VMware, Inc. All rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -60,6 +61,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
+
#define FAST_MATH 1
#define TILE_TOP_LEFT 0
@@ -67,11 +69,329 @@
#define TILE_BOTTOM_LEFT 2
#define TILE_BOTTOM_RIGHT 3
+static void
+micro_abs(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = fabsf(src->f[0]);
+ dst->f[1] = fabsf(src->f[1]);
+ dst->f[2] = fabsf(src->f[2]);
+ dst->f[3] = fabsf(src->f[3]);
+}
+
+static void
+micro_arl(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = (int)floorf(src->f[0]);
+ dst->i[1] = (int)floorf(src->f[1]);
+ dst->i[2] = (int)floorf(src->f[2]);
+ dst->i[3] = (int)floorf(src->f[3]);
+}
+
+static void
+micro_arr(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = (int)floorf(src->f[0] + 0.5f);
+ dst->i[1] = (int)floorf(src->f[1] + 0.5f);
+ dst->i[2] = (int)floorf(src->f[2] + 0.5f);
+ dst->i[3] = (int)floorf(src->f[3] + 0.5f);
+}
+
+static void
+micro_ceil(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = ceilf(src->f[0]);
+ dst->f[1] = ceilf(src->f[1]);
+ dst->f[2] = ceilf(src->f[2]);
+ dst->f[3] = ceilf(src->f[3]);
+}
+
+static void
+micro_cos(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = cosf(src->f[0]);
+ dst->f[1] = cosf(src->f[1]);
+ dst->f[2] = cosf(src->f[2]);
+ dst->f[3] = cosf(src->f[3]);
+}
+
+static void
+micro_ddx(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] =
+ dst->f[1] =
+ dst->f[2] =
+ dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
+}
+
+static void
+micro_ddy(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] =
+ dst->f[1] =
+ dst->f[2] =
+ dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
+}
+
+static void
+micro_exp2(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+#if FAST_MATH
+ dst->f[0] = util_fast_exp2(src->f[0]);
+ dst->f[1] = util_fast_exp2(src->f[1]);
+ dst->f[2] = util_fast_exp2(src->f[2]);
+ dst->f[3] = util_fast_exp2(src->f[3]);
+#else
+#if DEBUG
+ /* Inf is okay for this instruction, so clamp it to silence assertions. */
+ uint i;
+ union tgsi_exec_channel clamped;
+
+ for (i = 0; i < 4; i++) {
+ if (src->f[i] > 127.99999f) {
+ clamped.f[i] = 127.99999f;
+ } else if (src->f[i] < -126.99999f) {
+ clamped.f[i] = -126.99999f;
+ } else {
+ clamped.f[i] = src->f[i];
+ }
+ }
+ src = &clamped;
+#endif /* DEBUG */
+
+ dst->f[0] = powf(2.0f, src->f[0]);
+ dst->f[1] = powf(2.0f, src->f[1]);
+ dst->f[2] = powf(2.0f, src->f[2]);
+ dst->f[3] = powf(2.0f, src->f[3]);
+#endif /* FAST_MATH */
+}
+
+static void
+micro_flr(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = floorf(src->f[0]);
+ dst->f[1] = floorf(src->f[1]);
+ dst->f[2] = floorf(src->f[2]);
+ dst->f[3] = floorf(src->f[3]);
+}
+
+static void
+micro_frc(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src->f[0] - floorf(src->f[0]);
+ dst->f[1] = src->f[1] - floorf(src->f[1]);
+ dst->f[2] = src->f[2] - floorf(src->f[2]);
+ dst->f[3] = src->f[3] - floorf(src->f[3]);
+}
+
+static void
+micro_iabs(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
+ dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
+ dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
+ dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
+}
+
+static void
+micro_ineg(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = -src->i[0];
+ dst->i[1] = -src->i[1];
+ dst->i[2] = -src->i[2];
+ dst->i[3] = -src->i[3];
+}
+
+static void
+micro_lg2(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+#if FAST_MATH
+ dst->f[0] = util_fast_log2(src->f[0]);
+ dst->f[1] = util_fast_log2(src->f[1]);
+ dst->f[2] = util_fast_log2(src->f[2]);
+ dst->f[3] = util_fast_log2(src->f[3]);
+#else
+ dst->f[0] = logf(src->f[0]) * 1.442695f;
+ dst->f[1] = logf(src->f[1]) * 1.442695f;
+ dst->f[2] = logf(src->f[2]) * 1.442695f;
+ dst->f[3] = logf(src->f[3]) * 1.442695f;
+#endif
+}
+
+static void
+micro_lrp(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0];
+ dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1];
+ dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2];
+ dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3];
+}
+
+static void
+micro_mad(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0];
+ dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1];
+ dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2];
+ dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3];
+}
+
+static void
+micro_mov(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src->u[0];
+ dst->u[1] = src->u[1];
+ dst->u[2] = src->u[2];
+ dst->u[3] = src->u[3];
+}
+
+static void
+micro_rcp(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = 1.0f / src->f[0];
+ dst->f[1] = 1.0f / src->f[1];
+ dst->f[2] = 1.0f / src->f[2];
+ dst->f[3] = 1.0f / src->f[3];
+}
+
+static void
+micro_rnd(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = floorf(src->f[0] + 0.5f);
+ dst->f[1] = floorf(src->f[1] + 0.5f);
+ dst->f[2] = floorf(src->f[2] + 0.5f);
+ dst->f[3] = floorf(src->f[3] + 0.5f);
+}
+
+static void
+micro_rsq(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
+ dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
+ dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
+ dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
+}
+
+static void
+micro_seq(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_sge(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_sgn(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
+ dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
+ dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
+ dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
+}
+
+static void
+micro_sgt(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_sin(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = sinf(src->f[0]);
+ dst->f[1] = sinf(src->f[1]);
+ dst->f[2] = sinf(src->f[2]);
+ dst->f[3] = sinf(src->f[3]);
+}
+
+static void
+micro_sle(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_slt(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_sne(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f;
+}
+
+static void
+micro_trunc(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = (float)(int)src->f[0];
+ dst->f[1] = (float)(int)src->f[1];
+ dst->f[2] = (float)(int)src->f[2];
+ dst->f[3] = (float)(int)src->f[3];
+}
+
+
#define CHAN_X 0
#define CHAN_Y 1
#define CHAN_Z 2
#define CHAN_W 3
+enum tgsi_exec_datatype {
+ TGSI_EXEC_DATA_FLOAT,
+ TGSI_EXEC_DATA_INT,
+ TGSI_EXEC_DATA_UINT
+};
+
/*
* Shorthand locations of various utility registers (_I = Index, _C = Channel)
*/
@@ -123,23 +443,19 @@
/** The execution mask depends on the conditional mask and the loop mask */
#define UPDATE_EXEC_MASK(MACH) \
- MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
+ MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
static const union tgsi_exec_channel ZeroVec =
{ { 0.0, 0.0, 0.0, 0.0 } };
-#ifdef DEBUG
-static void
-check_inf_or_nan(const union tgsi_exec_channel *chan)
-{
- assert(!util_is_inf_or_nan(chan->f[0]));
- assert(!util_is_inf_or_nan(chan->f[1]));
- assert(!util_is_inf_or_nan(chan->f[2]));
- assert(!util_is_inf_or_nan(chan->f[3]));
-}
-#endif
+#define CHECK_INF_OR_NAN(chan) do {\
+ assert(!util_is_inf_or_nan((chan)->f[0]));\
+ assert(!util_is_inf_or_nan((chan)->f[1]));\
+ assert(!util_is_inf_or_nan((chan)->f[2]));\
+ assert(!util_is_inf_or_nan((chan)->f[3]));\
+ } while (0)
#ifdef DEBUG
@@ -292,6 +608,14 @@ tgsi_exec_machine_bind_shader(
* sizeof(struct tgsi_full_declaration));
maxDeclarations += 10;
}
+ if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
+ unsigned reg;
+ for (reg = parse.FullToken.FullDeclaration.Range.First;
+ reg <= parse.FullToken.FullDeclaration.Range.Last;
+ ++reg) {
+ ++mach->NumOutputs;
+ }
+ }
memcpy(declarations + numDeclarations,
&parse.FullToken.FullDeclaration,
sizeof(declarations[0]));
@@ -372,6 +696,7 @@ tgsi_exec_machine_create( void )
memset(mach, 0, sizeof(*mach));
mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
+ mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
/* Setup constants. */
@@ -413,18 +738,6 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
align_free(mach);
}
-
-static void
-micro_abs(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = fabsf( src->f[0] );
- dst->f[1] = fabsf( src->f[1] );
- dst->f[2] = fabsf( src->f[2] );
- dst->f[3] = fabsf( src->f[3] );
-}
-
static void
micro_add(
union tgsi_exec_channel *dst,
@@ -437,76 +750,6 @@ micro_add(
dst->f[3] = src0->f[3] + src1->f[3];
}
-#if 0
-static void
-micro_iadd(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] + src1->i[0];
- dst->i[1] = src0->i[1] + src1->i[1];
- dst->i[2] = src0->i[2] + src1->i[2];
- dst->i[3] = src0->i[3] + src1->i[3];
-}
-#endif
-
-static void
-micro_and(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] & src1->u[0];
- dst->u[1] = src0->u[1] & src1->u[1];
- dst->u[2] = src0->u[2] & src1->u[2];
- dst->u[3] = src0->u[3] & src1->u[3];
-}
-
-static void
-micro_ceil(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = ceilf( src->f[0] );
- dst->f[1] = ceilf( src->f[1] );
- dst->f[2] = ceilf( src->f[2] );
- dst->f[3] = ceilf( src->f[3] );
-}
-
-static void
-micro_cos(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = cosf( src->f[0] );
- dst->f[1] = cosf( src->f[1] );
- dst->f[2] = cosf( src->f[2] );
- dst->f[3] = cosf( src->f[3] );
-}
-
-static void
-micro_ddx(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] =
- dst->f[1] =
- dst->f[2] =
- dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
-}
-
-static void
-micro_ddy(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] =
- dst->f[1] =
- dst->f[2] =
- dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
-}
-
static void
micro_div(
union tgsi_exec_channel *dst,
@@ -527,99 +770,6 @@ micro_div(
}
}
-#if 0
-static void
-micro_udiv(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] / src1->u[0];
- dst->u[1] = src0->u[1] / src1->u[1];
- dst->u[2] = src0->u[2] / src1->u[2];
- dst->u[3] = src0->u[3] / src1->u[3];
-}
-#endif
-
-static void
-micro_eq(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-#if 0
-static void
-micro_ieq(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
- dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
- dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
- dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
-}
-#endif
-
-static void
-micro_exp2(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
-{
-#if FAST_MATH
- dst->f[0] = util_fast_exp2( src->f[0] );
- dst->f[1] = util_fast_exp2( src->f[1] );
- dst->f[2] = util_fast_exp2( src->f[2] );
- dst->f[3] = util_fast_exp2( src->f[3] );
-#else
-
-#if DEBUG
- /* Inf is okay for this instruction, so clamp it to silence assertions. */
- uint i;
- union tgsi_exec_channel clamped;
-
- for (i = 0; i < 4; i++) {
- if (src->f[i] > 127.99999f) {
- clamped.f[i] = 127.99999f;
- } else if (src->f[i] < -126.99999f) {
- clamped.f[i] = -126.99999f;
- } else {
- clamped.f[i] = src->f[i];
- }
- }
- src = &clamped;
-#endif
-
- dst->f[0] = powf( 2.0f, src->f[0] );
- dst->f[1] = powf( 2.0f, src->f[1] );
- dst->f[2] = powf( 2.0f, src->f[2] );
- dst->f[3] = powf( 2.0f, src->f[3] );
-#endif
-}
-
-#if 0
-static void
-micro_f2ut(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->u[0] = (uint) src->f[0];
- dst->u[1] = (uint) src->f[1];
- dst->u[2] = (uint) src->f[2];
- dst->u[3] = (uint) src->f[3];
-}
-#endif
-
static void
micro_float_clamp(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
@@ -647,71 +797,6 @@ micro_float_clamp(union tgsi_exec_channel *dst,
}
static void
-micro_flr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = floorf( src->f[0] );
- dst->f[1] = floorf( src->f[1] );
- dst->f[2] = floorf( src->f[2] );
- dst->f[3] = floorf( src->f[3] );
-}
-
-static void
-micro_frc(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = src->f[0] - floorf( src->f[0] );
- dst->f[1] = src->f[1] - floorf( src->f[1] );
- dst->f[2] = src->f[2] - floorf( src->f[2] );
- dst->f[3] = src->f[3] - floorf( src->f[3] );
-}
-
-static void
-micro_i2f(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) src->i[0];
- dst->f[1] = (float) src->i[1];
- dst->f[2] = (float) src->i[2];
- dst->f[3] = (float) src->i[3];
-}
-
-static void
-micro_lg2(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
-#if FAST_MATH
- dst->f[0] = util_fast_log2( src->f[0] );
- dst->f[1] = util_fast_log2( src->f[1] );
- dst->f[2] = util_fast_log2( src->f[2] );
- dst->f[3] = util_fast_log2( src->f[3] );
-#else
- dst->f[0] = logf( src->f[0] ) * 1.442695f;
- dst->f[1] = logf( src->f[1] ) * 1.442695f;
- dst->f[2] = logf( src->f[2] ) * 1.442695f;
- dst->f[3] = logf( src->f[3] ) * 1.442695f;
-#endif
-}
-
-static void
-micro_le(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
micro_lt(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src0,
@@ -725,38 +810,6 @@ micro_lt(
dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
}
-#if 0
-static void
-micro_ilt(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
- dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
- dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
- dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
-}
-#endif
-
-#if 0
-static void
-micro_ult(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
- dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
- dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
- dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
-}
-#endif
-
static void
micro_max(
union tgsi_exec_channel *dst,
@@ -769,34 +822,6 @@ micro_max(
dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
}
-#if 0
-static void
-micro_imax(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
- dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
- dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
- dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
-}
-#endif
-
-#if 0
-static void
-micro_umax(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
- dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
- dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
- dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
-}
-#endif
-
static void
micro_min(
union tgsi_exec_channel *dst,
@@ -809,48 +834,6 @@ micro_min(
dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
}
-#if 0
-static void
-micro_imin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
- dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
- dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
- dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
-}
-#endif
-
-#if 0
-static void
-micro_umin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
- dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
- dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
- dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
-}
-#endif
-
-#if 0
-static void
-micro_umod(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] % src1->u[0];
- dst->u[1] = src0->u[1] % src1->u[1];
- dst->u[2] = src0->u[2] % src1->u[2];
- dst->u[3] = src0->u[3] % src1->u[3];
-}
-#endif
-
static void
micro_mul(
union tgsi_exec_channel *dst,
@@ -865,20 +848,6 @@ micro_mul(
#if 0
static void
-micro_imul(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] * src1->i[0];
- dst->i[1] = src0->i[1] * src1->i[1];
- dst->i[2] = src0->i[2] * src1->i[2];
- dst->i[3] = src0->i[3] * src1->i[3];
-}
-#endif
-
-#if 0
-static void
micro_imul64(
union tgsi_exec_channel *dst0,
union tgsi_exec_channel *dst1,
@@ -942,42 +911,6 @@ micro_neg(
dst->f[3] = -src->f[3];
}
-#if 0
-static void
-micro_ineg(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->i[0] = -src->i[0];
- dst->i[1] = -src->i[1];
- dst->i[2] = -src->i[2];
- dst->i[3] = -src->i[3];
-}
-#endif
-
-static void
-micro_not(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->u[0] = ~src->u[0];
- dst->u[1] = ~src->u[1];
- dst->u[2] = ~src->u[2];
- dst->u[3] = ~src->u[3];
-}
-
-static void
-micro_or(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] | src1->u[0];
- dst->u[1] = src0->u[1] | src1->u[1];
- dst->u[2] = src0->u[2] | src1->u[2];
- dst->u[3] = src0->u[3] | src1->u[3];
-}
-
static void
micro_pow(
union tgsi_exec_channel *dst,
@@ -998,88 +931,6 @@ micro_pow(
}
static void
-micro_rnd(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = floorf( src->f[0] + 0.5f );
- dst->f[1] = floorf( src->f[1] + 0.5f );
- dst->f[2] = floorf( src->f[2] + 0.5f );
- dst->f[3] = floorf( src->f[3] + 0.5f );
-}
-
-static void
-micro_sgn(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
- dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
- dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
- dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
-}
-
-static void
-micro_shl(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] << src1->i[0];
- dst->i[1] = src0->i[1] << src1->i[1];
- dst->i[2] = src0->i[2] << src1->i[2];
- dst->i[3] = src0->i[3] << src1->i[3];
-}
-
-static void
-micro_ishr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] >> src1->i[0];
- dst->i[1] = src0->i[1] >> src1->i[1];
- dst->i[2] = src0->i[2] >> src1->i[2];
- dst->i[3] = src0->i[3] >> src1->i[3];
-}
-
-static void
-micro_trunc(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0 )
-{
- dst->f[0] = (float) (int) src0->f[0];
- dst->f[1] = (float) (int) src0->f[1];
- dst->f[2] = (float) (int) src0->f[2];
- dst->f[3] = (float) (int) src0->f[3];
-}
-
-#if 0
-static void
-micro_ushr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] >> src1->u[0];
- dst->u[1] = src0->u[1] >> src1->u[1];
- dst->u[2] = src0->u[2] >> src1->u[2];
- dst->u[3] = src0->u[3] >> src1->u[3];
-}
-#endif
-
-static void
-micro_sin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = sinf( src->f[0] );
- dst->f[1] = sinf( src->f[1] );
- dst->f[2] = sinf( src->f[2] );
- dst->f[3] = sinf( src->f[3] );
-}
-
-static void
micro_sqrt( union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
@@ -1101,31 +952,6 @@ micro_sub(
dst->f[3] = src0->f[3] - src1->f[3];
}
-#if 0
-static void
-micro_u2f(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) src->u[0];
- dst->f[1] = (float) src->u[1];
- dst->f[2] = (float) src->u[2];
- dst->f[3] = (float) src->u[3];
-}
-#endif
-
-static void
-micro_xor(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] ^ src1->u[0];
- dst->u[1] = src0->u[1] ^ src1->u[1];
- dst->u[2] = src0->u[2] ^ src1->u[2];
- dst->u[3] = src0->u[3] ^ src1->u[3];
-}
-
static void
fetch_src_file_channel(
const struct tgsi_exec_machine *mach,
@@ -1224,11 +1050,11 @@ fetch_src_file_channel(
}
static void
-fetch_source(
- const struct tgsi_exec_machine *mach,
- union tgsi_exec_channel *chan,
- const struct tgsi_full_src_register *reg,
- const uint chan_index )
+fetch_source(const struct tgsi_exec_machine *mach,
+ union tgsi_exec_channel *chan,
+ const struct tgsi_full_src_register *reg,
+ const uint chan_index,
+ enum tgsi_exec_datatype src_datatype)
{
union tgsi_exec_channel index;
uint swizzle;
@@ -1277,10 +1103,10 @@ fetch_source(
&indir_index );
/* add value of address register to the offset */
- index.i[0] += (int) indir_index.f[0];
- index.i[1] += (int) indir_index.f[1];
- index.i[2] += (int) indir_index.f[2];
- index.i[3] += (int) indir_index.f[3];
+ index.i[0] += indir_index.i[0];
+ index.i[1] += indir_index.i[1];
+ index.i[2] += indir_index.i[2];
+ index.i[3] += indir_index.i[3];
/* for disabled execution channels, zero-out the index to
* avoid using a potential garbage value.
@@ -1357,10 +1183,10 @@ fetch_source(
&index2,
&indir_index );
- index.i[0] += (int) indir_index.f[0];
- index.i[1] += (int) indir_index.f[1];
- index.i[2] += (int) indir_index.f[2];
- index.i[3] += (int) indir_index.f[3];
+ index.i[0] += indir_index.i[0];
+ index.i[1] += indir_index.i[1];
+ index.i[2] += indir_index.i[2];
+ index.i[3] += indir_index.i[3];
/* for disabled execution channels, zero-out the index to
* avoid using a potential garbage value.
@@ -1385,32 +1211,30 @@ fetch_source(
&index,
chan );
- switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
- case TGSI_UTIL_SIGN_CLEAR:
- micro_abs( chan, chan );
- break;
-
- case TGSI_UTIL_SIGN_SET:
- micro_abs( chan, chan );
- micro_neg( chan, chan );
- break;
-
- case TGSI_UTIL_SIGN_TOGGLE:
- micro_neg( chan, chan );
- break;
+ if (reg->Register.Absolute) {
+ if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
+ micro_abs(chan, chan);
+ } else {
+ micro_iabs(chan, chan);
+ }
+ }
- case TGSI_UTIL_SIGN_KEEP:
- break;
+ if (reg->Register.Negate) {
+ if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
+ micro_neg(chan, chan);
+ } else {
+ micro_ineg(chan, chan);
+ }
}
}
static void
-store_dest(
- struct tgsi_exec_machine *mach,
- const union tgsi_exec_channel *chan,
- const struct tgsi_full_dst_register *reg,
- const struct tgsi_full_instruction *inst,
- uint chan_index )
+store_dest(struct tgsi_exec_machine *mach,
+ const union tgsi_exec_channel *chan,
+ const struct tgsi_full_dst_register *reg,
+ const struct tgsi_full_instruction *inst,
+ uint chan_index,
+ enum tgsi_exec_datatype dst_datatype)
{
uint i;
union tgsi_exec_channel null;
@@ -1419,9 +1243,9 @@ store_dest(
int offset = 0; /* indirection offset */
int index;
-#ifdef DEBUG
- check_inf_or_nan(chan);
-#endif
+ if (dst_datatype == TGSI_EXEC_DATA_FLOAT) {
+ CHECK_INF_OR_NAN(chan);
+ }
/* There is an extra source register that indirectly subscripts
* a register file. The direct index now becomes an offset
@@ -1456,7 +1280,7 @@ store_dest(
&indir_index );
/* save indirection offset */
- offset = (int) indir_index.f[0];
+ offset = indir_index.i[0];
}
switch (reg->Register.File) {
@@ -1468,6 +1292,15 @@ store_dest(
index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
+ reg->Register.Index;
dst = &mach->Outputs[offset + index].xyzw[chan_index];
+#if 0
+ if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
+ fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (execmask & (1 << i))
+ fprintf(stderr, "%f, ", chan->f[i]);
+ fprintf(stderr, ")\n");
+ }
+#endif
break;
case TGSI_FILE_TEMPORARY:
@@ -1577,10 +1410,10 @@ store_dest(
}
#define FETCH(VAL,INDEX,CHAN)\
- fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
+ fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
#define STORE(VAL,INDEX,CHAN)\
- store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
+ store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
/**
@@ -1638,6 +1471,35 @@ exec_kilp(struct tgsi_exec_machine *mach,
mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
}
+static void
+emit_vertex(struct tgsi_exec_machine *mach)
+{
+ /* FIXME: check for exec mask correctly
+ unsigned i;
+ for (i = 0; i < QUAD_SIZE; ++i) {
+ if ((mach->ExecMask & (1 << i)))
+ */
+ if (mach->ExecMask) {
+ mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
+ mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+ }
+}
+
+static void
+emit_primitive(struct tgsi_exec_machine *mach)
+{
+ unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
+ /* FIXME: check for exec mask correctly
+ unsigned i;
+ for (i = 0; i < QUAD_SIZE; ++i) {
+ if ((mach->ExecMask & (1 << i)))
+ */
+ if (mach->ExecMask) {
+ ++(*prim_count);
+ debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
+ mach->Primitives[*prim_count] = 0;
+ }
+}
/*
* Fetch a four texture samples using STR texture coordinates.
@@ -1908,7 +1770,7 @@ exec_declaration(struct tgsi_exec_machine *mach,
if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
assert(decl->Semantic.Index == 0);
assert(first == last);
- assert(mask = TGSI_WRITEMASK_XYZW);
+ assert(mask == TGSI_WRITEMASK_XYZW);
mach->Inputs[first] = mach->QuadPos;
} else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
@@ -1954,6 +1816,461 @@ exec_declaration(struct tgsi_exec_machine *mach,
}
}
+typedef void (* micro_op)(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src);
+
+static void
+exec_scalar_unary(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_op op,
+ enum tgsi_exec_datatype dst_datatype,
+ enum tgsi_exec_datatype src_datatype)
+{
+ unsigned int chan;
+ union tgsi_exec_channel src;
+ union tgsi_exec_channel dst;
+
+ fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype);
+ op(&dst, &src);
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
+ }
+ }
+}
+
+static void
+exec_vector_unary(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_op op,
+ enum tgsi_exec_datatype dst_datatype,
+ enum tgsi_exec_datatype src_datatype)
+{
+ unsigned int chan;
+ struct tgsi_exec_vector dst;
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ union tgsi_exec_channel src;
+
+ fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
+ op(&dst.xyzw[chan], &src);
+ }
+ }
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
+ }
+ }
+}
+
+static void
+exec_vector_binary(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_op op,
+ enum tgsi_exec_datatype dst_datatype,
+ enum tgsi_exec_datatype src_datatype)
+{
+ unsigned int chan;
+ struct tgsi_exec_vector dst;
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ union tgsi_exec_channel src[2];
+
+ fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
+ fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
+ op(&dst.xyzw[chan], src);
+ }
+ }
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
+ }
+ }
+}
+
+static void
+exec_vector_trinary(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_op op,
+ enum tgsi_exec_datatype dst_datatype,
+ enum tgsi_exec_datatype src_datatype)
+{
+ unsigned int chan;
+ struct tgsi_exec_vector dst;
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ union tgsi_exec_channel src[3];
+
+ fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
+ fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
+ fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
+ op(&dst.xyzw[chan], src);
+ }
+ }
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
+ }
+ }
+}
+
+static void
+exec_break(struct tgsi_exec_machine *mach)
+{
+ if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
+ /* turn off loop channels for each enabled exec channel */
+ mach->LoopMask &= ~mach->ExecMask;
+ /* Todo: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
+ } else {
+ assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
+
+ mach->Switch.mask = 0x0;
+
+ UPDATE_EXEC_MASK(mach);
+ }
+}
+
+static void
+exec_switch(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
+ assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
+
+ mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
+ fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
+ mach->Switch.mask = 0x0;
+ mach->Switch.defaultMask = 0x0;
+
+ mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
+ mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
+
+ UPDATE_EXEC_MASK(mach);
+}
+
+static void
+exec_case(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
+ union tgsi_exec_channel src;
+ uint mask = 0;
+
+ fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
+
+ if (mach->Switch.selector.u[0] == src.u[0]) {
+ mask |= 0x1;
+ }
+ if (mach->Switch.selector.u[1] == src.u[1]) {
+ mask |= 0x2;
+ }
+ if (mach->Switch.selector.u[2] == src.u[2]) {
+ mask |= 0x4;
+ }
+ if (mach->Switch.selector.u[3] == src.u[3]) {
+ mask |= 0x8;
+ }
+
+ mach->Switch.defaultMask |= mask;
+
+ mach->Switch.mask |= mask & prevMask;
+
+ UPDATE_EXEC_MASK(mach);
+}
+
+static void
+exec_default(struct tgsi_exec_machine *mach)
+{
+ uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
+
+ mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
+
+ UPDATE_EXEC_MASK(mach);
+}
+
+static void
+exec_endswitch(struct tgsi_exec_machine *mach)
+{
+ mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
+ mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
+
+ UPDATE_EXEC_MASK(mach);
+}
+
+static void
+micro_i2f(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = (float)src->i[0];
+ dst->f[1] = (float)src->i[1];
+ dst->f[2] = (float)src->i[2];
+ dst->f[3] = (float)src->i[3];
+}
+
+static void
+micro_not(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = ~src->u[0];
+ dst->u[1] = ~src->u[1];
+ dst->u[2] = ~src->u[2];
+ dst->u[3] = ~src->u[3];
+}
+
+static void
+micro_shl(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] << src[1].u[0];
+ dst->u[1] = src[0].u[1] << src[1].u[1];
+ dst->u[2] = src[0].u[2] << src[1].u[2];
+ dst->u[3] = src[0].u[3] << src[1].u[3];
+}
+
+static void
+micro_and(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] & src[1].u[0];
+ dst->u[1] = src[0].u[1] & src[1].u[1];
+ dst->u[2] = src[0].u[2] & src[1].u[2];
+ dst->u[3] = src[0].u[3] & src[1].u[3];
+}
+
+static void
+micro_or(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] | src[1].u[0];
+ dst->u[1] = src[0].u[1] | src[1].u[1];
+ dst->u[2] = src[0].u[2] | src[1].u[2];
+ dst->u[3] = src[0].u[3] | src[1].u[3];
+}
+
+static void
+micro_xor(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] ^ src[1].u[0];
+ dst->u[1] = src[0].u[1] ^ src[1].u[1];
+ dst->u[2] = src[0].u[2] ^ src[1].u[2];
+ dst->u[3] = src[0].u[3] ^ src[1].u[3];
+}
+
+static void
+micro_f2i(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = (int)src->f[0];
+ dst->i[1] = (int)src->f[1];
+ dst->i[2] = (int)src->f[2];
+ dst->i[3] = (int)src->f[3];
+}
+
+static void
+micro_idiv(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] / src[1].i[0];
+ dst->i[1] = src[0].i[1] / src[1].i[1];
+ dst->i[2] = src[0].i[2] / src[1].i[2];
+ dst->i[3] = src[0].i[3] / src[1].i[3];
+}
+
+static void
+micro_imax(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0];
+ dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1];
+ dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2];
+ dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3];
+}
+
+static void
+micro_imin(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0];
+ dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1];
+ dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2];
+ dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3];
+}
+
+static void
+micro_isge(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0;
+ dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0;
+ dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0;
+ dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0;
+}
+
+static void
+micro_ishr(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] >> src[1].i[0];
+ dst->i[1] = src[0].i[1] >> src[1].i[1];
+ dst->i[2] = src[0].i[2] >> src[1].i[2];
+ dst->i[3] = src[0].i[3] >> src[1].i[3];
+}
+
+static void
+micro_islt(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0;
+ dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0;
+ dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0;
+ dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0;
+}
+
+static void
+micro_f2u(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = (uint)src->f[0];
+ dst->u[1] = (uint)src->f[1];
+ dst->u[2] = (uint)src->f[2];
+ dst->u[3] = (uint)src->f[3];
+}
+
+static void
+micro_u2f(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = (float)src->u[0];
+ dst->f[1] = (float)src->u[1];
+ dst->f[2] = (float)src->u[2];
+ dst->f[3] = (float)src->u[3];
+}
+
+static void
+micro_uadd(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] + src[1].u[0];
+ dst->u[1] = src[0].u[1] + src[1].u[1];
+ dst->u[2] = src[0].u[2] + src[1].u[2];
+ dst->u[3] = src[0].u[3] + src[1].u[3];
+}
+
+static void
+micro_udiv(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] / src[1].u[0];
+ dst->u[1] = src[0].u[1] / src[1].u[1];
+ dst->u[2] = src[0].u[2] / src[1].u[2];
+ dst->u[3] = src[0].u[3] / src[1].u[3];
+}
+
+static void
+micro_umad(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0];
+ dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1];
+ dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2];
+ dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3];
+}
+
+static void
+micro_umax(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0];
+ dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1];
+ dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2];
+ dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3];
+}
+
+static void
+micro_umin(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0];
+ dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1];
+ dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2];
+ dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3];
+}
+
+static void
+micro_umod(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] % src[1].u[0];
+ dst->u[1] = src[0].u[1] % src[1].u[1];
+ dst->u[2] = src[0].u[2] % src[1].u[2];
+ dst->u[3] = src[0].u[3] % src[1].u[3];
+}
+
+static void
+micro_umul(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] * src[1].u[0];
+ dst->u[1] = src[0].u[1] * src[1].u[1];
+ dst->u[2] = src[0].u[2] * src[1].u[2];
+ dst->u[3] = src[0].u[3] * src[1].u[3];
+}
+
+static void
+micro_useq(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0;
+ dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0;
+ dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0;
+ dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0;
+}
+
+static void
+micro_usge(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0;
+ dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0;
+ dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0;
+ dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0;
+}
+
+static void
+micro_ushr(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] >> src[1].u[0];
+ dst->u[1] = src[0].u[1] >> src[1].u[1];
+ dst->u[2] = src[0].u[2] >> src[1].u[2];
+ dst->u[3] = src[0].u[3] >> src[1].u[3];
+}
+
+static void
+micro_uslt(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0;
+ dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0;
+ dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0;
+ dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0;
+}
+
+static void
+micro_usne(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0;
+ dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0;
+ dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0;
+ dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0;
+}
+
static void
exec_instruction(
struct tgsi_exec_machine *mach,
@@ -1968,23 +2285,11 @@ exec_instruction(
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- case TGSI_OPCODE_FLR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_flr(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_MOV:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&d[chan_index], 0, chan_index);
- }
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_LIT:
@@ -2021,23 +2326,11 @@ exec_instruction(
break;
case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
- FETCH( &r[0], 0, CHAN_X );
- micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
- FETCH( &r[0], 0, CHAN_X );
- micro_abs( &r[0], &r[0] );
- micro_sqrt( &r[0], &r[0] );
- micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_EXP:
@@ -2208,41 +2501,15 @@ exec_instruction(
break;
case TGSI_OPCODE_SLT:
- /* TGSI_OPCODE_SETLT */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SGE:
- /* TGSI_OPCODE_SETGE */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_mul( &r[0], &r[0], &r[1] );
- FETCH( &r[1], 2, chan_index );
- micro_add(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SUB:
@@ -2257,17 +2524,7 @@ exec_instruction(
break;
case TGSI_OPCODE_LRP:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
- micro_sub( &r[1], &r[1], &r[2] );
- micro_mul( &r[0], &r[0], &r[1] );
- micro_add(&d[chan_index], &r[0], &r[2]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_CND:
@@ -2301,13 +2558,7 @@ exec_instruction(
break;
case TGSI_OPCODE_FRC:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_frc(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_CLAMP:
@@ -2323,33 +2574,20 @@ exec_instruction(
}
break;
+ case TGSI_OPCODE_FLR:
+ exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
+ break;
+
case TGSI_OPCODE_ROUND:
- case TGSI_OPCODE_ARR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_rnd(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_EX2:
- FETCH(&r[0], 0, CHAN_X);
-
- micro_exp2( &r[0], &r[0] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_LG2:
- FETCH( &r[0], 0, CHAN_X );
- micro_lg2( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_POW:
@@ -2402,15 +2640,9 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_ABS:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- micro_abs(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
- break;
+ case TGSI_OPCODE_ABS:
+ exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
+ break;
case TGSI_OPCODE_RCC:
FETCH(&r[0], 0, CHAN_X);
@@ -2449,33 +2681,15 @@ exec_instruction(
break;
case TGSI_OPCODE_COS:
- FETCH(&r[0], 0, CHAN_X);
-
- micro_cos( &r[0], &r[0] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_DDX:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ddx(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_DDY:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ddy(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_KILP:
@@ -2552,14 +2766,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SEQ:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SFL:
@@ -2569,44 +2776,19 @@ exec_instruction(
break;
case TGSI_OPCODE_SGT:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SIN:
- FETCH( &r[0], 0, CHAN_X );
- micro_sin( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SLE:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SNE:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_STR:
@@ -2711,6 +2893,10 @@ exec_instruction(
assert (0);
break;
+ case TGSI_OPCODE_ARR:
+ exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
+ break;
+
case TGSI_OPCODE_BRA:
assert (0);
break;
@@ -2730,6 +2916,8 @@ exec_instruction(
mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
+ mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
+ mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
/* note that PC was already incremented above */
mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
@@ -2737,12 +2925,17 @@ exec_instruction(
/* Second, push the Cond, Loop, Cont, Func stacks */
assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
- mach->CondStack[mach->CondStackTop++] = mach->CondMask;
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+ assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
+ assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
+
+ mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+ mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+ mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
+ mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
/* Finally, jump to the subroutine */
@@ -2775,6 +2968,12 @@ exec_instruction(
mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
mach->ContMask = mach->ContStack[mach->ContStackTop];
+ mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
+ mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
+
+ mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
+ mach->BreakType = mach->BreakStack[mach->BreakStackTop];
+
assert(mach->FuncStackTop > 0);
mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
@@ -2785,14 +2984,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SSG:
- /* TGSI_OPCODE_SGN */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_sgn(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_CMP:
@@ -2976,87 +3168,31 @@ exec_instruction(
break;
case TGSI_OPCODE_CEIL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ceil(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_I2F:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_i2f(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
break;
case TGSI_OPCODE_NOT:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_not(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_TRUNC:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_trunc(&d[chan_index], &r[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SHL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_shl(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_SHR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_ishr(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_AND:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_and(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_OR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_or(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_MOD:
@@ -3064,14 +3200,7 @@ exec_instruction(
break;
case TGSI_OPCODE_XOR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_xor(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_SAD:
@@ -3087,13 +3216,11 @@ exec_instruction(
break;
case TGSI_OPCODE_EMIT:
- mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
- mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+ emit_vertex(mach);
break;
case TGSI_OPCODE_ENDPRIM:
- mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
- mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
+ emit_primitive(mach);
break;
case TGSI_OPCODE_BGNFOR:
@@ -3122,11 +3249,15 @@ exec_instruction(
case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->ContStack[mach->ContStackTop++] = mach->ContMask;
assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
+
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+ mach->ContStack[mach->ContStackTop++] = mach->ContMask;
mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
+ mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
+ mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
break;
case TGSI_OPCODE_ENDFOR:
@@ -3173,6 +3304,8 @@ exec_instruction(
--mach->LoopLabelStackTop;
assert(mach->LoopCounterStackTop > 0);
--mach->LoopCounterStackTop;
+
+ mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
}
UPDATE_EXEC_MASK(mach);
break;
@@ -3196,15 +3329,14 @@ exec_instruction(
mach->ContMask = mach->ContStack[--mach->ContStackTop];
assert(mach->LoopLabelStackTop > 0);
--mach->LoopLabelStackTop;
+
+ mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
}
UPDATE_EXEC_MASK(mach);
break;
case TGSI_OPCODE_BRK:
- /* turn off loop channels for each enabled exec channel */
- mach->LoopMask &= ~mach->ExecMask;
- /* Todo: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
+ exec_break(mach);
break;
case TGSI_OPCODE_CONT:
@@ -3235,6 +3367,12 @@ exec_instruction(
mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
mach->ContMask = mach->ContStack[mach->ContStackTop];
+ mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
+ mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
+
+ mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
+ mach->BreakType = mach->BreakStack[mach->BreakStackTop];
+
assert(mach->FuncStackTop > 0);
mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
@@ -3265,11 +3403,116 @@ exec_instruction(
UPDATE_EXEC_MASK(mach);
break;
+ case TGSI_OPCODE_F2I:
+ exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
+ break;
+
+ case TGSI_OPCODE_IDIV:
+ exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_IMAX:
+ exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_IMIN:
+ exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_INEG:
+ exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_ISGE:
+ exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_ISHR:
+ exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_ISLT:
+ exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_F2U:
+ exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
+ break;
+
+ case TGSI_OPCODE_U2F:
+ exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UADD:
+ exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UDIV:
+ exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UMAD:
+ exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UMAX:
+ exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UMIN:
+ exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UMOD:
+ exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_UMUL:
+ exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_USEQ:
+ exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_USGE:
+ exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_USHR:
+ exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_USLT:
+ exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_USNE:
+ exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_SWITCH:
+ exec_switch(mach, inst);
+ break;
+
+ case TGSI_OPCODE_CASE:
+ exec_case(mach, inst);
+ break;
+
+ case TGSI_OPCODE_DEFAULT:
+ exec_default(mach);
+ break;
+
+ case TGSI_OPCODE_ENDSWITCH:
+ exec_endswitch(mach);
+ break;
+
default:
assert( 0 );
}
}
+
#define DEBUG_EXECUTION 0
@@ -3289,9 +3532,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
mach->FuncMask = 0xf;
mach->ExecMask = 0xf;
+ mach->Switch.mask = 0xf;
+
assert(mach->CondStackTop == 0);
assert(mach->LoopStackTop == 0);
assert(mach->ContStackTop == 0);
+ assert(mach->SwitchStackTop == 0);
+ assert(mach->BreakStackTop == 0);
assert(mach->CallStackTop == 0);
mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
@@ -3348,11 +3595,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
if (j > 0) {
debug_printf(" ");
}
- debug_printf("(%6f, %6f, %6f, %6f)\n",
- temps[i].xyzw[0].f[j],
- temps[i].xyzw[1].f[j],
- temps[i].xyzw[2].f[j],
- temps[i].xyzw[3].f[j]);
+ debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
+ temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
+ temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
+ temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
+ temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
}
}
}
@@ -3366,11 +3613,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
if (j > 0) {
debug_printf(" ");
}
- debug_printf("{%6f, %6f, %6f, %6f}\n",
- outputs[i].xyzw[0].f[j],
- outputs[i].xyzw[1].f[j],
- outputs[i].xyzw[2].f[j],
- outputs[i].xyzw[3].f[j]);
+ debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
+ outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
+ outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
+ outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
+ outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
}
}
}
@@ -3392,6 +3639,8 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
assert(mach->CondStackTop == 0);
assert(mach->LoopStackTop == 0);
assert(mach->ContStackTop == 0);
+ assert(mach->SwitchStackTop == 0);
+ assert(mach->BreakStackTop == 0);
assert(mach->CallStackTop == 0);
return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index fd94c1bc44..aa3a98d7f1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -179,6 +179,7 @@ struct tgsi_exec_labels
#define TGSI_EXEC_MAX_COND_NESTING 32
#define TGSI_EXEC_MAX_LOOP_NESTING 32
+#define TGSI_EXEC_MAX_SWITCH_NESTING 32
#define TGSI_EXEC_MAX_CALL_NESTING 32
/* The maximum number of input attributes per vertex. For 2D
@@ -191,6 +192,14 @@ struct tgsi_exec_labels
*/
#define TGSI_EXEC_MAX_CONST_BUFFER 4096
+/* The maximum number of vertices per primitive */
+#define TGSI_MAX_PRIM_VERTICES 6
+
+/* The maximum number of primitives to be generated */
+#define TGSI_MAX_PRIMITIVES 64
+
+/* The maximum total number of vertices */
+#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS)
/** function call/activation record */
struct tgsi_call_record
@@ -198,10 +207,29 @@ struct tgsi_call_record
uint CondStackTop;
uint LoopStackTop;
uint ContStackTop;
+ int SwitchStackTop;
+ int BreakStackTop;
uint ReturnAddr;
};
+/* Switch-case block state. */
+struct tgsi_switch_record {
+ uint mask; /**< execution mask */
+ union tgsi_exec_channel selector; /**< a value case statements are compared to */
+ uint defaultMask; /**< non-execute mask for default case */
+};
+
+
+enum tgsi_break_type {
+ TGSI_EXEC_BREAK_INSIDE_LOOP,
+ TGSI_EXEC_BREAK_INSIDE_SWITCH
+};
+
+
+#define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING)
+
+
/**
* Run-time virtual machine state for executing TGSI shader.
*/
@@ -214,8 +242,8 @@ struct tgsi_exec_machine
float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
- struct tgsi_exec_vector Inputs[PIPE_MAX_ATTRIBS];
- struct tgsi_exec_vector Outputs[PIPE_MAX_ATTRIBS];
+ struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS];
+ struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES];
struct tgsi_exec_vector *Addrs;
struct tgsi_exec_vector *Predicates;
@@ -229,6 +257,8 @@ struct tgsi_exec_machine
/* GEOMETRY processor only. */
unsigned *Primitives;
+ unsigned NumOutputs;
+ unsigned MaxGeometryShaderOutputs;
/* FRAGMENT processor only. */
const struct tgsi_interp_coef *InterpCoefs;
@@ -242,6 +272,12 @@ struct tgsi_exec_machine
uint FuncMask; /**< For function calls */
uint ExecMask; /**< = CondMask & LoopMask */
+ /* Current switch-case state. */
+ struct tgsi_switch_record Switch;
+
+ /* Current break type. */
+ enum tgsi_break_type BreakType;
+
/** Condition mask stack (for nested conditionals) */
uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
int CondStackTop;
@@ -262,6 +298,13 @@ struct tgsi_exec_machine
uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
int ContStackTop;
+ /** Switch case stack */
+ struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING];
+ int SwitchStackTop;
+
+ enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK];
+ int BreakStackTop;
+
/** Function execution mask stack (for executing subroutine code) */
uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
int FuncStackTop;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index be375cabb8..de0e09cdba 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -119,7 +119,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT },
{ 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC },
{ 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL },
- { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR },
+ { 0, 0, 0, 0, 0, 0, "", 88 }, /* removed */
{ 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND },
{ 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR },
{ 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD },
@@ -149,7 +149,33 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC },
{ 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL },
{ 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END },
- { 0, 0, 0, 0, 0, 0, "", 118 } /* removed */
+ { 0, 0, 0, 0, 0, 0, "", 118 }, /* removed */
+ { 1, 1, 0, 0, 0, 0, "F2I", TGSI_OPCODE_F2I },
+ { 1, 2, 0, 0, 0, 0, "IDIV", TGSI_OPCODE_IDIV },
+ { 1, 2, 0, 0, 0, 0, "IMAX", TGSI_OPCODE_IMAX },
+ { 1, 2, 0, 0, 0, 0, "IMIN", TGSI_OPCODE_IMIN },
+ { 1, 1, 0, 0, 0, 0, "INEG", TGSI_OPCODE_INEG },
+ { 1, 2, 0, 0, 0, 0, "ISGE", TGSI_OPCODE_ISGE },
+ { 1, 2, 0, 0, 0, 0, "ISHR", TGSI_OPCODE_ISHR },
+ { 1, 2, 0, 0, 0, 0, "ISLT", TGSI_OPCODE_ISLT },
+ { 1, 1, 0, 0, 0, 0, "F2U", TGSI_OPCODE_F2U },
+ { 1, 1, 0, 0, 0, 0, "U2F", TGSI_OPCODE_U2F },
+ { 1, 2, 0, 0, 0, 0, "UADD", TGSI_OPCODE_UADD },
+ { 1, 2, 0, 0, 0, 0, "UDIV", TGSI_OPCODE_UDIV },
+ { 1, 3, 0, 0, 0, 0, "UMAD", TGSI_OPCODE_UMAD },
+ { 1, 2, 0, 0, 0, 0, "UMAX", TGSI_OPCODE_UMAX },
+ { 1, 2, 0, 0, 0, 0, "UMIN", TGSI_OPCODE_UMIN },
+ { 1, 2, 0, 0, 0, 0, "UMOD", TGSI_OPCODE_UMOD },
+ { 1, 2, 0, 0, 0, 0, "UMUL", TGSI_OPCODE_UMUL },
+ { 1, 2, 0, 0, 0, 0, "USEQ", TGSI_OPCODE_USEQ },
+ { 1, 2, 0, 0, 0, 0, "USGE", TGSI_OPCODE_USGE },
+ { 1, 2, 0, 0, 0, 0, "USHR", TGSI_OPCODE_USHR },
+ { 1, 2, 0, 0, 0, 0, "USLT", TGSI_OPCODE_USLT },
+ { 1, 2, 0, 0, 0, 0, "USNE", TGSI_OPCODE_USNE },
+ { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH },
+ { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE },
+ { 0, 0, 0, 0, 0, 0, "DEFAULT", TGSI_OPCODE_DEFAULT },
+ { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH }
};
const struct tgsi_opcode_info *
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index b34263da48..e4af15c156 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -124,7 +124,6 @@ OP11(I2F)
OP11(NOT)
OP11(TRUNC)
OP12(SHL)
-OP12(SHR)
OP12(AND)
OP12(OR)
OP12(MOD)
@@ -146,6 +145,28 @@ OP01(IFC)
OP01(BREAKC)
OP01(KIL)
OP00(END)
+OP11(F2I)
+OP12(IDIV)
+OP12(IMAX)
+OP12(IMIN)
+OP11(INEG)
+OP12(ISGE)
+OP12(ISHR)
+OP12(ISLT)
+OP11(F2U)
+OP11(U2F)
+OP12(UADD)
+OP12(UDIV)
+OP13(UMAD)
+OP12(UMAX)
+OP12(UMIN)
+OP12(UMOD)
+OP12(UMUL)
+OP12(USEQ)
+OP12(USGE)
+OP12(USHR)
+OP12(USLT)
+OP12(USNE)
#undef OP00
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index fa65ecb997..8c7062d850 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -119,17 +119,29 @@ tgsi_parse_token(
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate;
+ uint imm_count;
memset(imm, 0, sizeof *imm);
copy_token(&imm->Immediate, &token);
+ imm_count = imm->Immediate.NrTokens - 1;
+
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
- {
- uint imm_count = imm->Immediate.NrTokens - 1;
- for (i = 0; i < imm_count; i++) {
- next_token(ctx, &imm->u[i]);
- }
+ for (i = 0; i < imm_count; i++) {
+ next_token(ctx, &imm->u[i].Float);
+ }
+ break;
+
+ case TGSI_IMM_UINT32:
+ for (i = 0; i < imm_count; i++) {
+ next_token(ctx, &imm->u[i].Uint);
+ }
+ break;
+
+ case TGSI_IMM_INT32:
+ for (i = 0; i < imm_count; i++) {
+ next_token(ctx, &imm->u[i].Int);
}
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index c27579e794..9b0644465a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -26,32 +26,112 @@
**************************************************************************/
#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+#include "cso_cache/cso_hash.h"
#include "tgsi_sanity.h"
#include "tgsi_info.h"
#include "tgsi_iterate.h"
-typedef uint reg_flag;
-
-#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8)
-
-#define MAX_REGISTERS 1024
-#define MAX_REG_FLAGS ((MAX_REGISTERS + BITS_IN_REG_FLAG - 1) / BITS_IN_REG_FLAG)
+typedef struct {
+ uint file : 28;
+ /* max 2 dimensions */
+ uint dimensions : 4;
+ uint indices[2];
+} scan_register;
struct sanity_check_ctx
{
struct tgsi_iterate_context iter;
+ struct cso_hash *regs_decl;
+ struct cso_hash *regs_used;
+ struct cso_hash *regs_ind_used;
- reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REG_FLAGS];
- reg_flag regs_used[TGSI_FILE_COUNT][MAX_REG_FLAGS];
- boolean regs_ind_used[TGSI_FILE_COUNT];
uint num_imms;
uint num_instructions;
uint index_of_END;
uint errors;
uint warnings;
+ uint implied_array_size;
};
+static INLINE unsigned
+scan_register_key(const scan_register *reg)
+{
+ unsigned key = reg->file;
+ key |= (reg->indices[0] << 4);
+ key |= (reg->indices[1] << 18);
+
+ return key;
+}
+
+static void
+fill_scan_register1d(scan_register *reg,
+ uint file, uint index)
+{
+ reg->file = file;
+ reg->dimensions = 1;
+ reg->indices[0] = index;
+ reg->indices[1] = 0;
+}
+
+static void
+fill_scan_register2d(scan_register *reg,
+ uint file, uint index1, uint index2)
+{
+ reg->file = file;
+ reg->dimensions = 2;
+ reg->indices[0] = index1;
+ reg->indices[1] = index2;
+}
+
+static void
+scan_register_dst(scan_register *reg,
+ struct tgsi_full_dst_register *dst)
+{
+ fill_scan_register1d(reg,
+ dst->Register.File,
+ dst->Register.Index);
+}
+
+static void
+scan_register_src(scan_register *reg,
+ struct tgsi_full_src_register *src)
+{
+ if (src->Register.Dimension) {
+ /*FIXME: right now we don't support indirect
+ * multidimensional addressing */
+ debug_assert(!src->Dimension.Indirect);
+ fill_scan_register2d(reg,
+ src->Register.File,
+ src->Register.Index,
+ src->Dimension.Index);
+ } else {
+ fill_scan_register1d(reg,
+ src->Register.File,
+ src->Register.Index);
+ }
+}
+
+static scan_register *
+create_scan_register_src(struct tgsi_full_src_register *src)
+{
+ scan_register *reg = MALLOC(sizeof(scan_register));
+ scan_register_src(reg, src);
+
+ return reg;
+}
+
+static scan_register *
+create_scan_register_dst(struct tgsi_full_dst_register *dst)
+{
+ scan_register *reg = MALLOC(sizeof(scan_register));
+ scan_register_dst(reg, dst);
+
+ return reg;
+}
+
static void
report_error(
struct sanity_check_ctx *ctx,
@@ -99,12 +179,12 @@ check_file_name(
static boolean
is_register_declared(
struct sanity_check_ctx *ctx,
- uint file,
- int index )
+ const scan_register *reg)
{
- assert( index >= 0 && index < MAX_REGISTERS );
-
- return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
+ void *data = cso_hash_find_data_from_template(
+ ctx->regs_decl, scan_register_key(reg),
+ (void*)reg, sizeof(scan_register));
+ return data ? TRUE : FALSE;
}
static boolean
@@ -112,23 +192,37 @@ is_any_register_declared(
struct sanity_check_ctx *ctx,
uint file )
{
- uint i;
+ struct cso_hash_iter iter =
+ cso_hash_first_node(ctx->regs_decl);
- for (i = 0; i < MAX_REG_FLAGS; i++)
- if (ctx->regs_decl[file][i])
+ while (!cso_hash_iter_is_null(iter)) {
+ scan_register *reg = (scan_register *)cso_hash_iter_data(iter);
+ if (reg->file == file)
return TRUE;
+ iter = cso_hash_iter_next(iter);
+ }
+
return FALSE;
}
static boolean
is_register_used(
struct sanity_check_ctx *ctx,
- uint file,
- int index )
+ scan_register *reg)
{
- assert( index < MAX_REGISTERS );
+ void *data = cso_hash_find_data_from_template(
+ ctx->regs_used, scan_register_key(reg),
+ reg, sizeof(scan_register));
+ return data ? TRUE : FALSE;
+}
- return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
+
+static boolean
+is_ind_register_used(
+ struct sanity_check_ctx *ctx,
+ scan_register *reg)
+{
+ return cso_hash_contains(ctx->regs_ind_used, reg->file);
}
static const char *file_names[TGSI_FILE_COUNT] =
@@ -148,31 +242,40 @@ static const char *file_names[TGSI_FILE_COUNT] =
static boolean
check_register_usage(
struct sanity_check_ctx *ctx,
- uint file,
- int index,
+ scan_register *reg,
const char *name,
boolean indirect_access )
{
- if (!check_file_name( ctx, file ))
+ if (!check_file_name( ctx, reg->file )) {
+ FREE(reg);
return FALSE;
+ }
if (indirect_access) {
/* Note that 'index' is an offset relative to the value of the
- * address register. No range checking done here.
- */
- if (!is_any_register_declared( ctx, file ))
- report_error( ctx, "%s: Undeclared %s register", file_names[file], name );
- ctx->regs_ind_used[file] = TRUE;
+ * address register. No range checking done here.*/
+ reg->indices[0] = 0;
+ reg->indices[1] = 0;
+ if (!is_any_register_declared( ctx, reg->file ))
+ report_error( ctx, "%s: Undeclared %s register", file_names[reg->file], name );
+ if (!is_ind_register_used(ctx, reg))
+ cso_hash_insert(ctx->regs_ind_used, reg->file, reg);
+ else
+ FREE(reg);
}
else {
- if (index < 0 || index >= MAX_REGISTERS) {
- report_error( ctx, "%s[%d]: Invalid %s index", file_names[file], index, name );
- return FALSE;
- }
-
- if (!is_register_declared( ctx, file, index ))
- report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name );
- ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG));
+ if (!is_register_declared( ctx, reg )) {
+ if (reg->dimensions == 2)
+ report_error( ctx, "%s[%d][%d]: Undeclared %s register", file_names[reg->file],
+ reg->indices[0], reg->indices[1], name );
+ else
+ report_error( ctx, "%s[%d]: Undeclared %s register", file_names[reg->file],
+ reg->indices[0], name );
+ }
+ if (!is_register_used( ctx, reg ))
+ cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg);
+ else
+ FREE(reg);
}
return TRUE;
}
@@ -210,33 +313,33 @@ iter_instruction(
* Mark the registers as used.
*/
for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ scan_register *reg = create_scan_register_dst(&inst->Dst[i]);
check_register_usage(
ctx,
- inst->Dst[i].Register.File,
- inst->Dst[i].Register.Index,
+ reg,
"destination",
FALSE );
}
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ scan_register *reg = create_scan_register_src(&inst->Src[i]);
check_register_usage(
ctx,
- inst->Src[i].Register.File,
- inst->Src[i].Register.Index,
+ reg,
"source",
(boolean)inst->Src[i].Register.Indirect );
if (inst->Src[i].Register.Indirect) {
- uint file;
- int index;
+ scan_register *ind_reg = MALLOC(sizeof(scan_register));
- file = inst->Src[i].Indirect.File;
- index = inst->Src[i].Indirect.Index;
+ fill_scan_register1d(ind_reg,
+ inst->Src[i].Indirect.File,
+ inst->Src[i].Indirect.Index);
check_register_usage(
ctx,
- file,
- index,
+ reg,
"indirect",
FALSE );
- if (!(file == TGSI_FILE_ADDRESS || file == TGSI_FILE_LOOP) || index != 0) {
+ if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) ||
+ reg->indices[0] != 0) {
report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]");
}
}
@@ -266,6 +369,19 @@ iter_instruction(
return TRUE;
}
+static void
+check_and_declare(struct sanity_check_ctx *ctx,
+ scan_register *reg)
+{
+ if (is_register_declared( ctx, reg))
+ report_error( ctx, "%s[%u]: The same register declared more than once",
+ file_names[reg->file], reg->indices[0] );
+ cso_hash_insert(ctx->regs_decl,
+ scan_register_key(reg),
+ reg);
+}
+
+
static boolean
iter_declaration(
struct tgsi_iterate_context *iter,
@@ -287,9 +403,21 @@ iter_declaration(
if (!check_file_name( ctx, file ))
return TRUE;
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
- if (is_register_declared( ctx, file, i ))
- report_error( ctx, "%s[%u]: The same register declared more than once", file_names[file], i );
- ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG));
+ /* declared TGSI_FILE_INPUT's for geometry processor
+ * have an implied second dimension */
+ if (file == TGSI_FILE_INPUT &&
+ ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) {
+ uint vert;
+ for (vert = 0; vert < ctx->implied_array_size; ++vert) {
+ scan_register *reg = MALLOC(sizeof(scan_register));
+ fill_scan_register2d(reg, file, vert, i);
+ check_and_declare(ctx, reg);
+ }
+ } else {
+ scan_register *reg = MALLOC(sizeof(scan_register));
+ fill_scan_register1d(reg, file, i);
+ check_and_declare(ctx, reg);
+ }
}
return TRUE;
@@ -301,8 +429,7 @@ iter_immediate(
struct tgsi_full_immediate *imm )
{
struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
-
- assert( ctx->num_imms < MAX_REGISTERS );
+ scan_register *reg;
/* No immediates allowed after the first instruction.
*/
@@ -311,12 +438,16 @@ iter_immediate(
/* Mark the register as declared.
*/
- ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG));
+ reg = MALLOC(sizeof(scan_register));
+ fill_scan_register1d(reg, TGSI_FILE_IMMEDIATE, ctx->num_imms);
+ cso_hash_insert(ctx->regs_decl, scan_register_key(reg), reg);
ctx->num_imms++;
/* Check data type validity.
*/
- if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) {
+ if (imm->Immediate.DataType != TGSI_IMM_FLOAT32 &&
+ imm->Immediate.DataType != TGSI_IMM_UINT32 &&
+ imm->Immediate.DataType != TGSI_IMM_INT32) {
report_error( ctx, "(%u): Invalid immediate data type", imm->Immediate.DataType );
return TRUE;
}
@@ -330,8 +461,12 @@ iter_property(
struct tgsi_iterate_context *iter,
struct tgsi_full_property *prop )
{
- /*struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;*/
+ struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
+ if (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY &&
+ prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) {
+ ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data);
+ }
return TRUE;
}
@@ -340,7 +475,6 @@ epilog(
struct tgsi_iterate_context *iter )
{
struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
- uint file;
/* There must be an END instruction somewhere.
*/
@@ -350,13 +484,17 @@ epilog(
/* Check if all declared registers were used.
*/
- for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) {
- uint i;
-
- for (i = 0; i < MAX_REGISTERS; i++) {
- if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) {
- report_warning( ctx, "%s[%u]: Register never used", file_names[file], i );
+ {
+ struct cso_hash_iter iter =
+ cso_hash_first_node(ctx->regs_decl);
+
+ while (!cso_hash_iter_is_null(iter)) {
+ scan_register *reg = (scan_register *)cso_hash_iter_data(iter);
+ if (!is_register_used(ctx, reg) && !is_ind_register_used(ctx, reg)) {
+ report_warning( ctx, "%s[%u]: Register never used",
+ file_names[reg->file], reg->indices[0] );
}
+ iter = cso_hash_iter_next(iter);
}
}
@@ -368,6 +506,18 @@ epilog(
return TRUE;
}
+static void
+regs_hash_destroy(struct cso_hash *hash)
+{
+ struct cso_hash_iter iter = cso_hash_first_node(hash);
+ while (!cso_hash_iter_is_null(iter)) {
+ scan_register *reg = (scan_register *)cso_hash_iter_data(iter);
+ iter = cso_hash_erase(hash, iter);
+ FREE(reg);
+ }
+ cso_hash_delete(hash);
+}
+
boolean
tgsi_sanity_check(
const struct tgsi_token *tokens )
@@ -381,18 +531,23 @@ tgsi_sanity_check(
ctx.iter.iterate_property = iter_property;
ctx.iter.epilog = epilog;
- memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) );
- memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) );
- memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) );
+ ctx.regs_decl = cso_hash_create();
+ ctx.regs_used = cso_hash_create();
+ ctx.regs_ind_used = cso_hash_create();
+
ctx.num_imms = 0;
ctx.num_instructions = 0;
ctx.index_of_END = ~0;
ctx.errors = 0;
ctx.warnings = 0;
+ ctx.implied_array_size = 0;
if (!tgsi_iterate_shader( tokens, &ctx.iter ))
return FALSE;
+ regs_hash_destroy(ctx.regs_decl);
+ regs_hash_destroy(ctx.regs_used);
+ regs_hash_destroy(ctx.regs_ind_used);
return ctx.errors == 0;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 0f48b0dc3a..a6cc773003 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -229,8 +229,8 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
/* Do a whole bunch of checks for a simple move */
if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV ||
- src->Register.File != TGSI_FILE_INPUT ||
- src->Register.File != TGSI_FILE_SYSTEM_VALUE ||
+ (src->Register.File != TGSI_FILE_INPUT &&
+ src->Register.File != TGSI_FILE_SYSTEM_VALUE) ||
dst->Register.File != TGSI_FILE_OUTPUT ||
src->Register.Index != dst->Register.Index ||
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index d63c75dafb..118059ace9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2578,7 +2578,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
return 0;
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index f000958bfc..9fcffeda36 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -27,7 +27,9 @@
#include "util/u_debug.h"
#include "util/u_memory.h"
+#include "util/u_prim.h"
#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
#include "tgsi_text.h"
#include "tgsi_build.h"
#include "tgsi_info.h"
@@ -61,18 +63,20 @@ static boolean uprcase( char c )
}
/*
- * Ignore case of str1 and assume str2 is already uppercase.
+ * Ignore case of str1 and assume str1 is already uppercase.
* Return TRUE iff str1 and str2 are equal.
*/
static int
streq_nocase_uprcase(const char *str1,
const char *str2)
{
- while (*str1 && uprcase(*str1) == *str2) {
+ while (*str1 && *str2) {
+ if (*str1 != uprcase(*str2))
+ return FALSE;
str1++;
str2++;
}
- return *str1 == *str2;
+ return TRUE;
}
static boolean str_match_no_case( const char **pcur, const char *str )
@@ -193,11 +197,26 @@ struct translate_ctx
struct tgsi_token *tokens_cur;
struct tgsi_token *tokens_end;
struct tgsi_header *header;
+ unsigned processor : 4;
+ int implied_array_size : 5;
};
static void report_error( struct translate_ctx *ctx, const char *msg )
{
- debug_printf( "\nError: %s", msg );
+ int line = 1;
+ int column = 1;
+ const char *itr = ctx->text;
+
+ while (itr != ctx->cur) {
+ if (*itr == '\n') {
+ column = 1;
+ ++line;
+ }
+ ++column;
+ ++itr;
+ }
+
+ debug_printf( "\nTGSI asm error: %s [%d : %d] \n", msg, line, column );
}
/* Parse shader header.
@@ -229,6 +248,7 @@ static boolean parse_header( struct translate_ctx *ctx )
if (ctx->tokens_cur >= ctx->tokens_end)
return FALSE;
*(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header );
+ ctx->processor = processor;
return TRUE;
}
@@ -325,92 +345,36 @@ parse_opt_writemask(
return TRUE;
}
-/* <register_file_bracket> ::= <file> `['
- */
static boolean
-parse_register_file_bracket(
- struct translate_ctx *ctx,
- uint *file )
-{
- if (!parse_file( &ctx->cur, file )) {
- report_error( ctx, "Unknown register file" );
- return FALSE;
- }
- eat_opt_white( &ctx->cur );
- if (*ctx->cur != '[') {
- report_error( ctx, "Expected `['" );
- return FALSE;
- }
- ctx->cur++;
- return TRUE;
-}
+parse_register_dst( struct translate_ctx *ctx,
+ uint *file,
+ int *index );
-/* <register_file_bracket_index> ::= <register_file_bracket> <uint>
- */
-static boolean
-parse_register_file_bracket_index(
- struct translate_ctx *ctx,
- uint *file,
- int *index )
-{
- uint uindex;
+struct parsed_src_bracket {
+ int index;
- if (!parse_register_file_bracket( ctx, file ))
- return FALSE;
- eat_opt_white( &ctx->cur );
- if (!parse_uint( &ctx->cur, &uindex )) {
- report_error( ctx, "Expected literal unsigned integer" );
- return FALSE;
- }
- *index = (int) uindex;
- return TRUE;
-}
+ uint ind_file;
+ int ind_index;
+ uint ind_comp;
+};
-/* Parse destination register operand.
- * <register_dst> ::= <register_file_bracket_index> `]'
- */
-static boolean
-parse_register_dst(
- struct translate_ctx *ctx,
- uint *file,
- int *index )
-{
- if (!parse_register_file_bracket_index( ctx, file, index ))
- return FALSE;
- eat_opt_white( &ctx->cur );
- if (*ctx->cur != ']') {
- report_error( ctx, "Expected `]'" );
- return FALSE;
- }
- ctx->cur++;
- return TRUE;
-}
-/* Parse source register operand.
- * <register_src> ::= <register_file_bracket_index> `]' |
- * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' |
- * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' |
- * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]'
- */
static boolean
-parse_register_src(
+parse_register_src_bracket(
struct translate_ctx *ctx,
- uint *file,
- int *index,
- uint *ind_file,
- int *ind_index,
- uint *ind_comp)
+ struct parsed_src_bracket *brackets)
{
const char *cur;
uint uindex;
- *ind_comp = TGSI_SWIZZLE_X;
- if (!parse_register_file_bracket( ctx, file ))
- return FALSE;
+ memset(brackets, 0, sizeof(struct parsed_src_bracket));
+
eat_opt_white( &ctx->cur );
+
cur = ctx->cur;
- if (parse_file( &cur, ind_file )) {
- if (!parse_register_dst( ctx, ind_file, ind_index ))
+ if (parse_file( &cur, &brackets->ind_file )) {
+ if (!parse_register_dst( ctx, &brackets->ind_file,
+ &brackets->ind_index ))
return FALSE;
eat_opt_white( &ctx->cur );
@@ -420,16 +384,16 @@ parse_register_src(
switch (uprcase(*ctx->cur)) {
case 'X':
- *ind_comp = TGSI_SWIZZLE_X;
+ brackets->ind_comp = TGSI_SWIZZLE_X;
break;
case 'Y':
- *ind_comp = TGSI_SWIZZLE_Y;
+ brackets->ind_comp = TGSI_SWIZZLE_Y;
break;
case 'Z':
- *ind_comp = TGSI_SWIZZLE_Z;
+ brackets->ind_comp = TGSI_SWIZZLE_Z;
break;
case 'W':
- *ind_comp = TGSI_SWIZZLE_W;
+ brackets->ind_comp = TGSI_SWIZZLE_W;
break;
default:
report_error(ctx, "Expected indirect register swizzle component `x', `y', `z' or `w'");
@@ -450,12 +414,12 @@ parse_register_src(
return FALSE;
}
if (negate)
- *index = -(int) uindex;
+ brackets->index = -(int) uindex;
else
- *index = (int) uindex;
+ brackets->index = (int) uindex;
}
else {
- *index = 0;
+ brackets->index = 0;
}
}
else {
@@ -463,9 +427,9 @@ parse_register_src(
report_error( ctx, "Expected literal unsigned integer" );
return FALSE;
}
- *index = (int) uindex;
- *ind_file = TGSI_FILE_NULL;
- *ind_index = 0;
+ brackets->index = (int) uindex;
+ brackets->ind_file = TGSI_FILE_NULL;
+ brackets->ind_index = 0;
}
eat_opt_white( &ctx->cur );
if (*ctx->cur != ']') {
@@ -476,20 +440,123 @@ parse_register_src(
return TRUE;
}
-/* Parse register declaration.
- * <register_dcl> ::= <register_file_bracket_index> `]' |
- * <register_file_bracket_index> `..' <index> `]'
+static boolean
+parse_opt_register_src_bracket(
+ struct translate_ctx *ctx,
+ struct parsed_src_bracket *brackets,
+ int *parsed_brackets)
+{
+ const char *cur = ctx->cur;
+
+ *parsed_brackets = 0;
+
+ eat_opt_white( &cur );
+ if (cur[0] == '[') {
+ ++cur;
+ ctx->cur = cur;
+
+ if (!parse_register_src_bracket(ctx, brackets))
+ return FALSE;
+
+ *parsed_brackets = 1;
+ }
+
+ return TRUE;
+}
+
+/* <register_file_bracket> ::= <file> `['
*/
static boolean
-parse_register_dcl(
+parse_register_file_bracket(
+ struct translate_ctx *ctx,
+ uint *file )
+{
+ if (!parse_file( &ctx->cur, file )) {
+ report_error( ctx, "Unknown register file" );
+ return FALSE;
+ }
+ eat_opt_white( &ctx->cur );
+ if (*ctx->cur != '[') {
+ report_error( ctx, "Expected `['" );
+ return FALSE;
+ }
+ ctx->cur++;
+ return TRUE;
+}
+
+/* <register_file_bracket_index> ::= <register_file_bracket> <uint>
+ */
+static boolean
+parse_register_file_bracket_index(
+ struct translate_ctx *ctx,
+ uint *file,
+ int *index )
+{
+ uint uindex;
+
+ if (!parse_register_file_bracket( ctx, file ))
+ return FALSE;
+ eat_opt_white( &ctx->cur );
+ if (!parse_uint( &ctx->cur, &uindex )) {
+ report_error( ctx, "Expected literal unsigned integer" );
+ return FALSE;
+ }
+ *index = (int) uindex;
+ return TRUE;
+}
+
+/* Parse source register operand.
+ * <register_src> ::= <register_file_bracket_index> `]' |
+ * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' |
+ * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' |
+ * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]'
+ */
+static boolean
+parse_register_src(
struct translate_ctx *ctx,
uint *file,
- int *first,
- int *last )
+ struct parsed_src_bracket *brackets)
+{
+
+ brackets->ind_comp = TGSI_SWIZZLE_X;
+ if (!parse_register_file_bracket( ctx, file ))
+ return FALSE;
+ if (!parse_register_src_bracket( ctx, brackets ))
+ return FALSE;
+
+ return TRUE;
+}
+
+struct parsed_dcl_bracket {
+ uint first;
+ uint last;
+};
+
+static boolean
+parse_register_dcl_bracket(
+ struct translate_ctx *ctx,
+ struct parsed_dcl_bracket *bracket)
{
- if (!parse_register_file_bracket_index( ctx, file, first ))
+ uint uindex;
+ memset(bracket, 0, sizeof(struct parsed_dcl_bracket));
+
+ eat_opt_white( &ctx->cur );
+
+ if (!parse_uint( &ctx->cur, &uindex )) {
+ /* it can be an empty bracket [] which means its range
+ * is from 0 to some implied size */
+ if (ctx->cur[0] == ']' && ctx->implied_array_size != 0) {
+ bracket->first = 0;
+ bracket->last = ctx->implied_array_size - 1;
+ goto cleanup;
+ }
+ report_error( ctx, "Expected literal unsigned integer" );
return FALSE;
+ }
+ bracket->first = (int) uindex;
+
eat_opt_white( &ctx->cur );
+
if (ctx->cur[0] == '.' && ctx->cur[1] == '.') {
uint uindex;
@@ -499,12 +566,14 @@ parse_register_dcl(
report_error( ctx, "Expected literal integer" );
return FALSE;
}
- *last = (int) uindex;
+ bracket->last = (int) uindex;
eat_opt_white( &ctx->cur );
}
else {
- *last = *first;
+ bracket->last = bracket->first;
}
+
+cleanup:
if (*ctx->cur != ']') {
report_error( ctx, "Expected `]' or `..'" );
return FALSE;
@@ -513,6 +582,70 @@ parse_register_dcl(
return TRUE;
}
+/* Parse register declaration.
+ * <register_dcl> ::= <register_file_bracket_index> `]' |
+ * <register_file_bracket_index> `..' <index> `]'
+ */
+static boolean
+parse_register_dcl(
+ struct translate_ctx *ctx,
+ uint *file,
+ struct parsed_dcl_bracket *brackets,
+ int *num_brackets)
+{
+ const char *cur;
+
+ *num_brackets = 0;
+
+ if (!parse_register_file_bracket( ctx, file ))
+ return FALSE;
+ if (!parse_register_dcl_bracket( ctx, &brackets[0] ))
+ return FALSE;
+
+ *num_brackets = 1;
+
+ cur = ctx->cur;
+ eat_opt_white( &cur );
+
+ if (cur[0] == '[') {
+ ++cur;
+ ctx->cur = cur;
+ if (!parse_register_dcl_bracket( ctx, &brackets[1] ))
+ return FALSE;
+ /* for geometry shader we don't really care about
+ * the first brackets it's always the size of the
+ * input primitive. so we want to declare just
+ * the index relevant to the semantics which is in
+ * the second bracket */
+ if (ctx->processor == TGSI_PROCESSOR_GEOMETRY) {
+ brackets[0] = brackets[1];
+ }
+ *num_brackets = 2;
+ }
+
+ return TRUE;
+}
+
+
+/* Parse destination register operand.
+ * <register_dst> ::= <register_file_bracket_index> `]'
+ */
+static boolean
+parse_register_dst(
+ struct translate_ctx *ctx,
+ uint *file,
+ int *index )
+{
+ if (!parse_register_file_bracket_index( ctx, file, index ))
+ return FALSE;
+ eat_opt_white( &ctx->cur );
+ if (*ctx->cur != ']') {
+ report_error( ctx, "Expected `]'" );
+ return FALSE;
+ }
+ ctx->cur++;
+ return TRUE;
+}
static boolean
parse_dst_operand(
@@ -582,37 +715,44 @@ parse_src_operand(
struct tgsi_full_src_register *src )
{
uint file;
- int index;
- uint ind_file;
- int ind_index;
- uint ind_comp;
uint swizzle[4];
boolean parsed_swizzle;
+ struct parsed_src_bracket bracket[2];
+ int parsed_opt_brackets;
if (*ctx->cur == '-') {
ctx->cur++;
eat_opt_white( &ctx->cur );
src->Register.Negate = 1;
}
-
+
if (*ctx->cur == '|') {
ctx->cur++;
eat_opt_white( &ctx->cur );
src->Register.Absolute = 1;
}
- if (!parse_register_src(ctx, &file, &index, &ind_file, &ind_index, &ind_comp))
+ if (!parse_register_src(ctx, &file, &bracket[0]))
+ return FALSE;
+ if (!parse_opt_register_src_bracket(ctx, &bracket[1], &parsed_opt_brackets))
return FALSE;
+
src->Register.File = file;
- src->Register.Index = index;
- if (ind_file != TGSI_FILE_NULL) {
+ src->Register.Index = bracket[0].index;
+ if (bracket[0].ind_file != TGSI_FILE_NULL) {
src->Register.Indirect = 1;
- src->Indirect.File = ind_file;
- src->Indirect.Index = ind_index;
- src->Indirect.SwizzleX = ind_comp;
- src->Indirect.SwizzleY = ind_comp;
- src->Indirect.SwizzleZ = ind_comp;
- src->Indirect.SwizzleW = ind_comp;
+ src->Indirect.File = bracket[0].ind_file;
+ src->Indirect.Index = bracket[0].ind_index;
+ src->Indirect.SwizzleX = bracket[0].ind_comp;
+ src->Indirect.SwizzleY = bracket[0].ind_comp;
+ src->Indirect.SwizzleZ = bracket[0].ind_comp;
+ src->Indirect.SwizzleW = bracket[0].ind_comp;
+ }
+ if (parsed_opt_brackets) {
+ src->Register.Dimension = 1;
+ src->Dimension.Indirect = 0;
+ src->Dimension.Dimension = 0;
+ src->Dimension.Index = bracket[1].index;
}
/* Parse optional swizzle.
@@ -791,7 +931,9 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] =
"PSIZE",
"GENERIC",
"NORMAL",
- "FACE"
+ "FACE",
+ "EDGEFLAG",
+ "PRIM_ID"
};
static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
@@ -805,8 +947,8 @@ static boolean parse_declaration( struct translate_ctx *ctx )
{
struct tgsi_full_declaration decl;
uint file;
- int first;
- int last;
+ struct parsed_dcl_bracket brackets[2];
+ int num_brackets;
uint writemask;
const char *cur;
uint advance;
@@ -818,7 +960,7 @@ static boolean parse_declaration( struct translate_ctx *ctx )
report_error( ctx, "Syntax error" );
return FALSE;
}
- if (!parse_register_dcl( ctx, &file, &first, &last ))
+ if (!parse_register_dcl( ctx, &file, brackets, &num_brackets))
return FALSE;
if (!parse_opt_writemask( ctx, &writemask ))
return FALSE;
@@ -826,8 +968,8 @@ static boolean parse_declaration( struct translate_ctx *ctx )
decl = tgsi_default_full_declaration();
decl.Declaration.File = file;
decl.Declaration.UsageMask = writemask;
- decl.Range.First = first;
- decl.Range.Last = last;
+ decl.Range.First = brackets[0].first;
+ decl.Range.Last = brackets[0].last;
cur = ctx->cur;
eat_opt_white( &cur );
@@ -1027,7 +1169,7 @@ static boolean parse_property( struct translate_ctx *ctx )
}
for (property_name = 0; property_name < TGSI_PROPERTY_COUNT;
++property_name) {
- if (streq_nocase_uprcase(id, property_names[property_name])) {
+ if (streq_nocase_uprcase(property_names[property_name], id)) {
break;
}
}
@@ -1044,6 +1186,10 @@ static boolean parse_property( struct translate_ctx *ctx )
report_error( ctx, "Unknown primitive name as property!" );
return FALSE;
}
+ if (property_name == TGSI_PROPERTY_GS_INPUT_PRIM &&
+ ctx->processor == TGSI_PROCESSOR_GEOMETRY) {
+ ctx->implied_array_size = u_vertices_per_prim(values[0]);
+ }
break;
default:
if (!parse_uint(&ctx->cur, &values[0] )) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 2713372b05..2b51672b8e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -109,8 +109,13 @@ struct ureg_program
unsigned nr_outputs;
struct {
- float v[4];
+ union {
+ float f[4];
+ unsigned u[4];
+ int i[4];
+ } value;
unsigned nr;
+ unsigned type;
} immediate[UREG_MAX_IMMEDIATE];
unsigned nr_immediates;
@@ -513,22 +518,22 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
}
-
-
-static int match_or_expand_immediate( const float *v,
- unsigned nr,
- float *v2,
- unsigned *nr2,
- unsigned *swizzle )
+static int
+match_or_expand_immediate( const unsigned *v,
+ unsigned nr,
+ unsigned *v2,
+ unsigned *pnr2,
+ unsigned *swizzle )
{
+ unsigned nr2 = *pnr2;
unsigned i, j;
-
+
*swizzle = 0;
for (i = 0; i < nr; i++) {
boolean found = FALSE;
- for (j = 0; j < *nr2 && !found; j++) {
+ for (j = 0; j < nr2 && !found; j++) {
if (v[i] == v2[j]) {
*swizzle |= j << (i * 2);
found = TRUE;
@@ -536,24 +541,28 @@ static int match_or_expand_immediate( const float *v,
}
if (!found) {
- if (*nr2 >= 4)
+ if (nr2 >= 4) {
return FALSE;
+ }
- v2[*nr2] = v[i];
- *swizzle |= *nr2 << (i * 2);
- (*nr2)++;
+ v2[nr2] = v[i];
+ *swizzle |= nr2 << (i * 2);
+ nr2++;
}
}
+ /* Actually expand immediate only when fully succeeded.
+ */
+ *pnr2 = nr2;
return TRUE;
}
-
-
-struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg,
- const float *v,
- unsigned nr )
+static struct ureg_src
+decl_immediate( struct ureg_program *ureg,
+ const unsigned *v,
+ unsigned nr,
+ unsigned type )
{
unsigned i, j;
unsigned swizzle;
@@ -563,38 +572,82 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg,
*/
for (i = 0; i < ureg->nr_immediates; i++) {
- if (match_or_expand_immediate( v,
- nr,
- ureg->immediate[i].v,
- &ureg->immediate[i].nr,
- &swizzle ))
+ if (ureg->immediate[i].type != type) {
+ continue;
+ }
+ if (match_or_expand_immediate(v,
+ nr,
+ ureg->immediate[i].value.u,
+ &ureg->immediate[i].nr,
+ &swizzle)) {
goto out;
+ }
}
if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) {
i = ureg->nr_immediates++;
- if (match_or_expand_immediate( v,
- nr,
- ureg->immediate[i].v,
- &ureg->immediate[i].nr,
- &swizzle ))
+ ureg->immediate[i].type = type;
+ if (match_or_expand_immediate(v,
+ nr,
+ ureg->immediate[i].value.u,
+ &ureg->immediate[i].nr,
+ &swizzle)) {
goto out;
+ }
}
- set_bad( ureg );
+ set_bad(ureg);
out:
/* Make sure that all referenced elements are from this immediate.
* Has the effect of making size-one immediates into scalars.
*/
- for (j = nr; j < 4; j++)
+ for (j = nr; j < 4; j++) {
swizzle |= (swizzle & 0x3) << (j * 2);
+ }
+
+ return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i),
+ (swizzle >> 0) & 0x3,
+ (swizzle >> 2) & 0x3,
+ (swizzle >> 4) & 0x3,
+ (swizzle >> 6) & 0x3);
+}
+
+
+struct ureg_src
+ureg_DECL_immediate( struct ureg_program *ureg,
+ const float *v,
+ unsigned nr )
+{
+ union {
+ float f[4];
+ unsigned u[4];
+ } fu;
+ unsigned int i;
+
+ for (i = 0; i < nr; i++) {
+ fu.f[i] = v[i];
+ }
+
+ return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32);
+}
+
- return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ),
- (swizzle >> 0) & 0x3,
- (swizzle >> 2) & 0x3,
- (swizzle >> 4) & 0x3,
- (swizzle >> 6) & 0x3);
+struct ureg_src
+ureg_DECL_immediate_uint( struct ureg_program *ureg,
+ const unsigned *v,
+ unsigned nr )
+{
+ return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32);
+}
+
+
+struct ureg_src
+ureg_DECL_immediate_int( struct ureg_program *ureg,
+ const int *v,
+ unsigned nr )
+{
+ return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32);
}
@@ -818,8 +871,8 @@ ureg_insn(struct ureg_program *ureg,
unsigned i;
boolean saturate;
boolean predicate;
- boolean negate;
- unsigned swizzle[4];
+ boolean negate = FALSE;
+ unsigned swizzle[4] = { 0 };
saturate = nr_dst ? dst[0].Saturate : FALSE;
predicate = nr_dst ? dst[0].Predicate : FALSE;
@@ -865,8 +918,8 @@ ureg_tex_insn(struct ureg_program *ureg,
unsigned i;
boolean saturate;
boolean predicate;
- boolean negate;
- unsigned swizzle[4];
+ boolean negate = FALSE;
+ unsigned swizzle[4] = { 0 };
saturate = nr_dst ? dst[0].Saturate : FALSE;
predicate = nr_dst ? dst[0].Predicate : FALSE;
@@ -982,21 +1035,23 @@ static void emit_decl_range( struct ureg_program *ureg,
out[1].decl_range.Last = first + count - 1;
}
-static void emit_immediate( struct ureg_program *ureg,
- const float *v )
+static void
+emit_immediate( struct ureg_program *ureg,
+ const unsigned *v,
+ unsigned type )
{
union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 );
out[0].value = 0;
out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
out[0].imm.NrTokens = 5;
- out[0].imm.DataType = TGSI_IMM_FLOAT32;
+ out[0].imm.DataType = type;
out[0].imm.Padding = 0;
- out[1].imm_data.Float = v[0];
- out[2].imm_data.Float = v[1];
- out[3].imm_data.Float = v[2];
- out[4].imm_data.Float = v[3];
+ out[1].imm_data.Uint = v[0];
+ out[2].imm_data.Uint = v[1];
+ out[3].imm_data.Uint = v[2];
+ out[4].imm_data.Uint = v[3];
}
@@ -1091,7 +1146,8 @@ static void emit_decls( struct ureg_program *ureg )
for (i = 0; i < ureg->nr_immediates; i++) {
emit_immediate( ureg,
- ureg->immediate[i].v );
+ ureg->immediate[i].value.u,
+ ureg->immediate[i].type );
}
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 179862d4e2..38e2fd8d0a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -154,6 +154,16 @@ ureg_DECL_immediate( struct ureg_program *,
unsigned nr );
struct ureg_src
+ureg_DECL_immediate_uint( struct ureg_program *,
+ const unsigned *v,
+ unsigned nr );
+
+struct ureg_src
+ureg_DECL_immediate_int( struct ureg_program *,
+ const int *v,
+ unsigned nr );
+
+struct ureg_src
ureg_DECL_constant( struct ureg_program *,
unsigned index );
@@ -227,6 +237,90 @@ ureg_imm1f( struct ureg_program *ureg,
return ureg_DECL_immediate( ureg, v, 1 );
}
+static INLINE struct ureg_src
+ureg_imm4u( struct ureg_program *ureg,
+ unsigned a, unsigned b,
+ unsigned c, unsigned d)
+{
+ unsigned v[4];
+ v[0] = a;
+ v[1] = b;
+ v[2] = c;
+ v[3] = d;
+ return ureg_DECL_immediate_uint( ureg, v, 4 );
+}
+
+static INLINE struct ureg_src
+ureg_imm3u( struct ureg_program *ureg,
+ unsigned a, unsigned b,
+ unsigned c)
+{
+ unsigned v[3];
+ v[0] = a;
+ v[1] = b;
+ v[2] = c;
+ return ureg_DECL_immediate_uint( ureg, v, 3 );
+}
+
+static INLINE struct ureg_src
+ureg_imm2u( struct ureg_program *ureg,
+ unsigned a, unsigned b)
+{
+ unsigned v[2];
+ v[0] = a;
+ v[1] = b;
+ return ureg_DECL_immediate_uint( ureg, v, 2 );
+}
+
+static INLINE struct ureg_src
+ureg_imm1u( struct ureg_program *ureg,
+ unsigned a)
+{
+ return ureg_DECL_immediate_uint( ureg, &a, 1 );
+}
+
+static INLINE struct ureg_src
+ureg_imm4i( struct ureg_program *ureg,
+ int a, int b,
+ int c, int d)
+{
+ int v[4];
+ v[0] = a;
+ v[1] = b;
+ v[2] = c;
+ v[3] = d;
+ return ureg_DECL_immediate_int( ureg, v, 4 );
+}
+
+static INLINE struct ureg_src
+ureg_imm3i( struct ureg_program *ureg,
+ int a, int b,
+ int c)
+{
+ int v[3];
+ v[0] = a;
+ v[1] = b;
+ v[2] = c;
+ return ureg_DECL_immediate_int( ureg, v, 3 );
+}
+
+static INLINE struct ureg_src
+ureg_imm2i( struct ureg_program *ureg,
+ int a, int b)
+{
+ int v[2];
+ v[0] = a;
+ v[1] = b;
+ return ureg_DECL_immediate_int( ureg, v, 2 );
+}
+
+static INLINE struct ureg_src
+ureg_imm1i( struct ureg_program *ureg,
+ int a)
+{
+ return ureg_DECL_immediate_int( ureg, &a, 1 );
+}
+
/***********************************************************************
* Functions for patching up labels
*/