summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c30
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c18
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c34
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c17
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c39
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c11
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c18
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h4
-rw-r--r--src/gallium/docs/source/conf.py4
-rw-r--r--src/gallium/docs/source/exts/tgsi.py17
-rw-r--r--src/gallium/docs/source/screen.rst4
-rw-r--r--src/gallium/docs/source/tgsi.rst512
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c2
-rw-r--r--src/gallium/drivers/r300/r300_fs.c10
-rw-r--r--src/gallium/drivers/r300/r300_texture.h13
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c63
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.h7
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c3
-rw-r--r--src/gallium/drivers/svga/svga_context.c1
-rw-r--r--src/gallium/drivers/svga/svga_context.h4
-rw-r--r--src/gallium/drivers/svga/svga_state_fs.c126
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.h1
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_decl_sm30.c15
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_emit.h19
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c271
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d.c20
-rw-r--r--src/gallium/state_trackers/vega/asm_fill.h551
-rw-r--r--src/gallium/state_trackers/vega/shaders_cache.c149
30 files changed, 1263 insertions, 708 deletions
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index d3084fd428..d5ddc4a6a9 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -352,7 +352,10 @@ draw_find_shader_output(const struct draw_context *draw,
/**
- * Return number of the shader outputs.
+ * Return total number of the shader outputs. This function is similar to
+ * draw_current_shader_outputs() but this function also counts any extra
+ * vertex/geometry output attributes that may be filled in by some draw
+ * stages (such as AA point, AA line).
*
* If geometry shader is present, its output will be returned,
* if not vertex shader is used.
@@ -362,8 +365,9 @@ draw_num_shader_outputs(const struct draw_context *draw)
{
uint count = draw->vs.vertex_shader->info.num_outputs;
- /* if geometry shader is present, its outputs go to te
- * driver, not the vertex shaders */
+ /* If a geometry shader is present, its outputs go to the
+ * driver, else the vertex shader's outputs.
+ */
if (draw->gs.geometry_shader)
count = draw->gs.geometry_shader->info.num_outputs;
@@ -374,7 +378,8 @@ draw_num_shader_outputs(const struct draw_context *draw)
/**
- * Provide TGSI sampler objects for vertex/geometry shaders that use texture fetches.
+ * Provide TGSI sampler objects for vertex/geometry shaders that use
+ * texture fetches.
* This might only be used by software drivers for the time being.
*/
void
@@ -454,14 +459,27 @@ void draw_do_flush( struct draw_context *draw, unsigned flags )
}
-int draw_current_shader_outputs(struct draw_context *draw)
+/**
+ * Return the number of output attributes produced by the geometry
+ * shader, if present. If no geometry shader, return the number of
+ * outputs from the vertex shader.
+ * \sa draw_num_shader_outputs
+ */
+uint
+draw_current_shader_outputs(const struct draw_context *draw)
{
if (draw->gs.geometry_shader)
return draw->gs.num_gs_outputs;
return draw->vs.num_vs_outputs;
}
-int draw_current_shader_position_output(struct draw_context *draw)
+
+/**
+ * Return the index of the shader output which will contain the
+ * vertex position.
+ */
+uint
+draw_current_shader_position_output(const struct draw_context *draw)
{
if (draw->gs.geometry_shader)
return draw->gs.position_output;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index e58129b99d..8f6ca15dfa 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -49,6 +49,10 @@
#include "draw_pipe.h"
+/** Approx number of new tokens for instructions in aa_transform_inst() */
+#define NUM_NEW_TOKENS 50
+
+
/**
* Max texture level for the alpha texture used for antialiasing
*/
@@ -179,12 +183,7 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
static int
free_bit(uint bitfield)
{
- int i;
- for (i = 0; i < 32; i++) {
- if ((bitfield & (1 << i)) == 0)
- return i;
- }
- return -1;
+ return ffs(~bitfield) - 1;
}
@@ -343,11 +342,10 @@ generate_aaline_fs(struct aaline_stage *aaline)
const struct pipe_shader_state *orig_fs = &aaline->fs->state;
struct pipe_shader_state aaline_fs;
struct aa_transform_context transform;
-
-#define MAX 1000
+ const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
aaline_fs = *orig_fs; /* copy to init */
- aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ aaline_fs.tokens = tgsi_alloc_tokens(newLen);
if (aaline_fs.tokens == NULL)
return FALSE;
@@ -363,7 +361,7 @@ generate_aaline_fs(struct aaline_stage *aaline)
tgsi_transform_shader(orig_fs->tokens,
(struct tgsi_token *) aaline_fs.tokens,
- MAX, &transform.base);
+ newLen, &transform.base);
#if 0 /* DEBUG */
tgsi_dump(orig_fs->tokens, 0);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index d86717e518..97f3480879 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -53,6 +53,10 @@
#include "draw_pipe.h"
+/** Approx number of new tokens for instructions in aa_transform_inst() */
+#define NUM_NEW_TOKENS 200
+
+
/*
* Enabling NORMALIZE might give _slightly_ better results.
* Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
@@ -81,16 +85,19 @@ struct aapoint_stage
{
struct draw_stage stage;
- int psize_slot;
+ /** half of pipe_rasterizer_state::point_size */
float radius;
+ /** vertex attrib slot containing point size */
+ int psize_slot;
+
/** this is the vertex attrib slot for the new texcoords */
uint tex_slot;
+
+ /** vertex attrib slot containing position */
uint pos_slot;
- /*
- * Currently bound state
- */
+ /** Currently bound fragment shader */
struct aapoint_fragment_shader *fs;
/*
@@ -491,11 +498,10 @@ generate_aapoint_fs(struct aapoint_stage *aapoint)
const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
struct pipe_shader_state aapoint_fs;
struct aa_transform_context transform;
-
-#define MAX 1000
+ const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
aapoint_fs = *orig_fs; /* copy to init */
- aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ aapoint_fs.tokens = tgsi_alloc_tokens(newLen);
if (aapoint_fs.tokens == NULL)
return FALSE;
@@ -511,7 +517,7 @@ generate_aapoint_fs(struct aapoint_stage *aapoint)
tgsi_transform_shader(orig_fs->tokens,
(struct tgsi_token *) aapoint_fs.tokens,
- MAX, &transform.base);
+ newLen, &transform.base);
#if 0 /* DEBUG */
printf("draw_aapoint, orig shader:\n");
@@ -575,8 +581,8 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header)
const struct aapoint_stage *aapoint = aapoint_stage(stage);
struct prim_header tri;
struct vertex_header *v[4];
- uint texPos = aapoint->tex_slot;
- uint pos_slot = aapoint->pos_slot;
+ const uint tex_slot = aapoint->tex_slot;
+ const uint pos_slot = aapoint->pos_slot;
float radius, *pos, *tex;
uint i;
float k;
@@ -643,16 +649,16 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header)
pos[1] += radius;
/* new texcoords */
- tex = v[0]->data[texPos];
+ tex = v[0]->data[tex_slot];
ASSIGN_4V(tex, -1, -1, k, 1);
- tex = v[1]->data[texPos];
+ tex = v[1]->data[tex_slot];
ASSIGN_4V(tex, 1, -1, k, 1);
- tex = v[2]->data[texPos];
+ tex = v[2]->data[tex_slot];
ASSIGN_4V(tex, 1, 1, k, 1);
- tex = v[3]->data[texPos];
+ tex = v[3]->data[tex_slot];
ASSIGN_4V(tex, -1, 1, k, 1);
/* emit 2 tris for the quad strip */
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index f047d8359c..d0d99aa331 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -50,6 +50,9 @@
#include "draw_pipe.h"
+/** Approx number of new tokens for instructions in pstip_transform_inst() */
+#define NUM_NEW_TOKENS 50
+
/**
* Subclass of pipe_shader_state to carry extra fragment shader info.
@@ -172,12 +175,7 @@ pstip_transform_immed(struct tgsi_transform_context *ctx,
static int
free_bit(uint bitfield)
{
- int i;
- for (i = 0; i < 32; i++) {
- if ((bitfield & (1 << i)) == 0)
- return i;
- }
- return -1;
+ return ffs(~bitfield) - 1;
}
@@ -333,11 +331,10 @@ generate_pstip_fs(struct pstip_stage *pstip)
/*struct draw_context *draw = pstip->stage.draw;*/
struct pipe_shader_state pstip_fs;
struct pstip_transform_context transform;
-
-#define MAX 1000
+ const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
pstip_fs = *orig_fs; /* copy to init */
- pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ pstip_fs.tokens = tgsi_alloc_tokens(newLen);
if (pstip_fs.tokens == NULL)
return FALSE;
@@ -352,7 +349,7 @@ generate_pstip_fs(struct pstip_stage *pstip)
tgsi_transform_shader(orig_fs->tokens,
(struct tgsi_token *) pstip_fs.tokens,
- MAX, &transform.base);
+ newLen, &transform.base);
#if 0 /* DEBUG */
tgsi_dump(orig_fs->tokens, 0);
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 6a7190e975..69466d8749 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -280,8 +280,8 @@ void draw_gs_destroy( struct draw_context *draw );
/*******************************************************************************
* Common shading code:
*/
-int draw_current_shader_outputs(struct draw_context *draw);
-int draw_current_shader_position_output(struct draw_context *draw);
+uint draw_current_shader_outputs(const struct draw_context *draw);
+uint draw_current_shader_position_output(const struct draw_context *draw);
/*******************************************************************************
* Vertex processing (was passthrough) code:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index fbb9aa0e63..f7a1bb74a9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -264,6 +264,12 @@ static void
micro_rcp(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
+#if 0 /* for debugging */
+ assert(src->f[0] != 0.0f);
+ assert(src->f[1] != 0.0f);
+ assert(src->f[2] != 0.0f);
+ assert(src->f[3] != 0.0f);
+#endif
dst->f[0] = 1.0f / src->f[0];
dst->f[1] = 1.0f / src->f[1];
dst->f[2] = 1.0f / src->f[2];
@@ -284,6 +290,12 @@ static void
micro_rsq(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
+#if 0 /* for debugging */
+ assert(src->f[0] != 0.0f);
+ assert(src->f[1] != 0.0f);
+ assert(src->f[2] != 0.0f);
+ assert(src->f[3] != 0.0f);
+#endif
dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
@@ -450,12 +462,20 @@ static const union tgsi_exec_channel ZeroVec =
{ { 0.0, 0.0, 0.0, 0.0 } };
-#define CHECK_INF_OR_NAN(chan) do {\
- assert(!util_is_inf_or_nan((chan)->f[0]));\
- assert(!util_is_inf_or_nan((chan)->f[1]));\
- assert(!util_is_inf_or_nan((chan)->f[2]));\
- assert(!util_is_inf_or_nan((chan)->f[3]));\
- } while (0)
+/**
+ * Assert that none of the float values in 'chan' are infinite or NaN.
+ * NaN and Inf may occur normally during program execution and should
+ * not lead to crashes, etc. But when debugging, it's helpful to catch
+ * them.
+ */
+static INLINE void
+check_inf_or_nan(const union tgsi_exec_channel *chan)
+{
+ assert(!util_is_inf_or_nan((chan)->f[0]));
+ assert(!util_is_inf_or_nan((chan)->f[1]));
+ assert(!util_is_inf_or_nan((chan)->f[2]));
+ assert(!util_is_inf_or_nan((chan)->f[3]));
+}
#ifdef DEBUG
@@ -1219,8 +1239,9 @@ store_dest(struct tgsi_exec_machine *mach,
int offset = 0; /* indirection offset */
int index;
- if (dst_datatype == TGSI_EXEC_DATA_FLOAT) {
- CHECK_INF_OR_NAN(chan);
+ /* for debugging */
+ if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
+ check_inf_or_nan(chan);
}
/* There is an extra source register that indirectly subscripts
@@ -1478,7 +1499,7 @@ emit_primitive(struct tgsi_exec_machine *mach)
}
/*
- * Fetch a four texture samples using STR texture coordinates.
+ * Fetch four texture samples using STR texture coordinates.
*/
static void
fetch_texel( struct tgsi_sampler *sampler,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index fd37fc3079..7e19e1fe36 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -284,3 +284,14 @@ tgsi_dup_tokens(const struct tgsi_token *tokens)
memcpy(new_tokens, tokens, bytes);
return new_tokens;
}
+
+
+/**
+ * Allocate memory for num_tokens tokens.
+ */
+struct tgsi_token *
+tgsi_alloc_tokens(unsigned num_tokens)
+{
+ unsigned bytes = num_tokens * sizeof(struct tgsi_token);
+ return (struct tgsi_token *) MALLOC(bytes);
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 8150e3cd29..b45ccee2f6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -130,6 +130,10 @@ tgsi_num_tokens(const struct tgsi_token *tokens);
struct tgsi_token *
tgsi_dup_tokens(const struct tgsi_token *tokens);
+struct tgsi_token *
+tgsi_alloc_tokens(unsigned num_tokens);
+
+
#if defined __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 60a1cb1af4..27960bac22 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -104,6 +104,8 @@ struct ureg_program
struct {
unsigned index;
+ unsigned semantic_name;
+ unsigned semantic_index;
} gs_input[UREG_MAX_INPUT];
unsigned nr_gs_inputs;
@@ -326,10 +328,14 @@ ureg_DECL_vs_input( struct ureg_program *ureg,
struct ureg_src
ureg_DECL_gs_input(struct ureg_program *ureg,
- unsigned index)
+ unsigned index,
+ unsigned semantic_name,
+ unsigned semantic_index)
{
if (ureg->nr_gs_inputs < UREG_MAX_INPUT) {
ureg->gs_input[ureg->nr_gs_inputs].index = index;
+ ureg->gs_input[ureg->nr_gs_inputs].semantic_name = semantic_name;
+ ureg->gs_input[ureg->nr_gs_inputs].semantic_index = semantic_index;
ureg->nr_gs_inputs++;
} else {
set_bad(ureg);
@@ -1252,10 +1258,12 @@ static void emit_decls( struct ureg_program *ureg )
}
} else {
for (i = 0; i < ureg->nr_gs_inputs; i++) {
- emit_decl_range(ureg,
- TGSI_FILE_INPUT,
- ureg->gs_input[i].index,
- 1);
+ emit_decl(ureg,
+ TGSI_FILE_INPUT,
+ ureg->gs_input[i].index,
+ ureg->gs_input[i].semantic_name,
+ ureg->gs_input[i].semantic_index,
+ TGSI_INTERPOLATE_CONSTANT);
}
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 6198ca3464..6be66d0694 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -161,7 +161,9 @@ ureg_DECL_vs_input( struct ureg_program *,
struct ureg_src
ureg_DECL_gs_input(struct ureg_program *,
- unsigned index);
+ unsigned index,
+ unsigned semantic_name,
+ unsigned semantic_index);
struct ureg_src
ureg_DECL_system_value(struct ureg_program *,
diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py
index 9b0c86babd..59c19ed98d 100644
--- a/src/gallium/docs/source/conf.py
+++ b/src/gallium/docs/source/conf.py
@@ -16,13 +16,13 @@ import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.append(os.path.abspath('.'))
+sys.path.append(os.path.abspath('exts'))
# -- General configuration -----------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.pngmath']
+extensions = ['sphinx.ext.pngmath', 'tgsi']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
diff --git a/src/gallium/docs/source/exts/tgsi.py b/src/gallium/docs/source/exts/tgsi.py
new file mode 100644
index 0000000000..e92cd5c4d1
--- /dev/null
+++ b/src/gallium/docs/source/exts/tgsi.py
@@ -0,0 +1,17 @@
+# tgsi.py
+# Sphinx extension providing formatting for TGSI opcodes
+# (c) Corbin Simpson 2010
+
+import docutils.nodes
+import sphinx.addnodes
+
+def parse_opcode(env, sig, signode):
+ opcode, desc = sig.split("-", 1)
+ opcode = opcode.strip().upper()
+ desc = " (%s)" % desc.strip()
+ signode += sphinx.addnodes.desc_name(opcode, opcode)
+ signode += sphinx.addnodes.desc_annotation(desc, desc)
+ return opcode
+
+def setup(app):
+ app.add_description_unit("opcode", "opcode", "%s (TGSI opcode)", parse_opcode)
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 3e57a282fd..55a4c6990d 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -227,7 +227,7 @@ buffer_map
Map a buffer into memory.
-**usage** is a bitmask of :ref:`PIPE_TEXTURE_USAGE` flags.
+**usage** is a bitmask of :ref:`PIPE_BUFFER_USAGE` flags.
Returns a pointer to the map, or NULL if the mapping failed.
@@ -248,7 +248,7 @@ Flush a range of mapped memory into a buffer.
The buffer must have been mapped with ``PIPE_BUFFER_USAGE_FLUSH_EXPLICIT``.
-**usage** is a bitmask of :ref:`PIPE_TEXTURE_USAGE` flags.
+**usage** is a bitmask of :ref:`PIPE_BUFFER_USAGE` flags.
buffer_unmap
^^^^^^^^^^^^
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 3e702ceeda..5478d86667 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -6,6 +6,23 @@ for describing shaders. Since Gallium is inherently shaderful, shaders are
an important part of the API. TGSI is the only intermediate representation
used by all drivers.
+Basics
+------
+
+All TGSI instructions, known as *opcodes*, operate on arbitrary-precision
+floating-point four-component vectors. An opcode may have up to one
+destination register, known as *dst*, and between zero and three source
+registers, called *src0* through *src2*, or simply *src* if there is only
+one.
+
+Some instructions, like :opcode:`I2F`, permit re-interpretation of vector
+components as integers. Other instructions permit using registers as
+two-component vectors with double precision; see :ref:`Double Opcodes`.
+
+When an instruction has a scalar result, the result is usually copied into
+each of the components of *dst*. When this happens, the result is said to be
+*replicated* to *dst*. :opcode:`RCP` is one such instruction.
+
Instruction Set
---------------
@@ -13,7 +30,7 @@ From GL_NV_vertex_program
^^^^^^^^^^^^^^^^^^^^^^^^^
-ARL - Address Register Load
+.. opcode:: ARL - Address Register Load
.. math::
@@ -26,7 +43,7 @@ ARL - Address Register Load
dst.w = \lfloor src.w\rfloor
-MOV - Move
+.. opcode:: MOV - Move
.. math::
@@ -39,7 +56,7 @@ MOV - Move
dst.w = src.w
-LIT - Light Coefficients
+.. opcode:: LIT - Light Coefficients
.. math::
@@ -52,33 +69,25 @@ LIT - Light Coefficients
dst.w = 1
-RCP - Reciprocal
-
-.. math::
+.. opcode:: RCP - Reciprocal
- dst.x = \frac{1}{src.x}
+This instruction replicates its result.
- dst.y = \frac{1}{src.x}
+.. math::
- dst.z = \frac{1}{src.x}
+ dst = \frac{1}{src.x}
- dst.w = \frac{1}{src.x}
+.. opcode:: RSQ - Reciprocal Square Root
-RSQ - Reciprocal Square Root
+This instruction replicates its result.
.. math::
- dst.x = \frac{1}{\sqrt{|src.x|}}
-
- dst.y = \frac{1}{\sqrt{|src.x|}}
-
- dst.z = \frac{1}{\sqrt{|src.x|}}
+ dst = \frac{1}{\sqrt{|src.x|}}
- dst.w = \frac{1}{\sqrt{|src.x|}}
-
-EXP - Approximate Exponential Base 2
+.. opcode:: EXP - Approximate Exponential Base 2
.. math::
@@ -91,7 +100,7 @@ EXP - Approximate Exponential Base 2
dst.w = 1
-LOG - Approximate Logarithm Base 2
+.. opcode:: LOG - Approximate Logarithm Base 2
.. math::
@@ -104,7 +113,7 @@ LOG - Approximate Logarithm Base 2
dst.w = 1
-MUL - Multiply
+.. opcode:: MUL - Multiply
.. math::
@@ -117,7 +126,7 @@ MUL - Multiply
dst.w = src0.w \times src1.w
-ADD - Add
+.. opcode:: ADD - Add
.. math::
@@ -130,33 +139,25 @@ ADD - Add
dst.w = src0.w + src1.w
-DP3 - 3-component Dot Product
-
-.. math::
+.. opcode:: DP3 - 3-component Dot Product
- dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+This instruction replicates its result.
- dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+.. math::
- dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+ dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
- dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
+.. opcode:: DP4 - 4-component Dot Product
-DP4 - 4-component Dot Product
+This instruction replicates its result.
.. math::
- dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
-
- dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
+ dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
- dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
- dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
-
-
-DST - Distance Vector
+.. opcode:: DST - Distance Vector
.. math::
@@ -169,7 +170,7 @@ DST - Distance Vector
dst.w = src1.w
-MIN - Minimum
+.. opcode:: MIN - Minimum
.. math::
@@ -182,7 +183,7 @@ MIN - Minimum
dst.w = min(src0.w, src1.w)
-MAX - Maximum
+.. opcode:: MAX - Maximum
.. math::
@@ -195,7 +196,7 @@ MAX - Maximum
dst.w = max(src0.w, src1.w)
-SLT - Set On Less Than
+.. opcode:: SLT - Set On Less Than
.. math::
@@ -208,7 +209,7 @@ SLT - Set On Less Than
dst.w = (src0.w < src1.w) ? 1 : 0
-SGE - Set On Greater Equal Than
+.. opcode:: SGE - Set On Greater Equal Than
.. math::
@@ -221,7 +222,7 @@ SGE - Set On Greater Equal Than
dst.w = (src0.w >= src1.w) ? 1 : 0
-MAD - Multiply And Add
+.. opcode:: MAD - Multiply And Add
.. math::
@@ -234,7 +235,7 @@ MAD - Multiply And Add
dst.w = src0.w \times src1.w + src2.w
-SUB - Subtract
+.. opcode:: SUB - Subtract
.. math::
@@ -247,7 +248,7 @@ SUB - Subtract
dst.w = src0.w - src1.w
-LRP - Linear Interpolate
+.. opcode:: LRP - Linear Interpolate
.. math::
@@ -260,7 +261,7 @@ LRP - Linear Interpolate
dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
-CND - Condition
+.. opcode:: CND - Condition
.. math::
@@ -273,7 +274,7 @@ CND - Condition
dst.w = (src2.w > 0.5) ? src0.w : src1.w
-DP2A - 2-component Dot Product And Add
+.. opcode:: DP2A - 2-component Dot Product And Add
.. math::
@@ -286,7 +287,7 @@ DP2A - 2-component Dot Product And Add
dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
-FRAC - Fraction
+.. opcode:: FRAC - Fraction
.. math::
@@ -299,7 +300,7 @@ FRAC - Fraction
dst.w = src.w - \lfloor src.w\rfloor
-CLAMP - Clamp
+.. opcode:: CLAMP - Clamp
.. math::
@@ -312,9 +313,9 @@ CLAMP - Clamp
dst.w = clamp(src0.w, src1.w, src2.w)
-FLR - Floor
+.. opcode:: FLR - Floor
-This is identical to ARL.
+This is identical to :opcode:`ARL`.
.. math::
@@ -327,7 +328,7 @@ This is identical to ARL.
dst.w = \lfloor src.w\rfloor
-ROUND - Round
+.. opcode:: ROUND - Round
.. math::
@@ -340,45 +341,33 @@ ROUND - Round
dst.w = round(src.w)
-EX2 - Exponential Base 2
+.. opcode:: EX2 - Exponential Base 2
-.. math::
+This instruction replicates its result.
- dst.x = 2^{src.x}
-
- dst.y = 2^{src.x}
+.. math::
- dst.z = 2^{src.x}
+ dst = 2^{src.x}
- dst.w = 2^{src.x}
+.. opcode:: LG2 - Logarithm Base 2
-LG2 - Logarithm Base 2
+This instruction replicates its result.
.. math::
- dst.x = \log_2{src.x}
-
- dst.y = \log_2{src.x}
-
- dst.z = \log_2{src.x}
+ dst = \log_2{src.x}
- dst.w = \log_2{src.x}
+.. opcode:: POW - Power
-POW - Power
+This instruction replicates its result.
.. math::
- dst.x = src0.x^{src1.x}
+ dst = src0.x^{src1.x}
- dst.y = src0.x^{src1.x}
-
- dst.z = src0.x^{src1.x}
-
- dst.w = src0.x^{src1.x}
-
-XPD - Cross Product
+.. opcode:: XPD - Cross Product
.. math::
@@ -391,7 +380,7 @@ XPD - Cross Product
dst.w = 1
-ABS - Absolute
+.. opcode:: ABS - Absolute
.. math::
@@ -404,48 +393,36 @@ ABS - Absolute
dst.w = |src.w|
-RCC - Reciprocal Clamped
+.. opcode:: RCC - Reciprocal Clamped
+
+This instruction replicates its result.
XXX cleanup on aisle three
.. math::
- dst.x = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
-
- dst.y = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
-
- dst.z = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
+ dst = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
- dst.w = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
+.. opcode:: DPH - Homogeneous Dot Product
-DPH - Homogeneous Dot Product
+This instruction replicates its result.
.. math::
- dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
+ dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
- dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
- dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
+.. opcode:: COS - Cosine
- dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
-
-
-COS - Cosine
+This instruction replicates its result.
.. math::
- dst.x = \cos{src.x}
-
- dst.y = \cos{src.x}
-
- dst.z = \cos{src.x}
+ dst = \cos{src.x}
- dst.w = \cos{src.x}
-
-DDX - Derivative Relative To X
+.. opcode:: DDX - Derivative Relative To X
.. math::
@@ -458,7 +435,7 @@ DDX - Derivative Relative To X
dst.w = partialx(src.w)
-DDY - Derivative Relative To Y
+.. opcode:: DDY - Derivative Relative To Y
.. math::
@@ -471,32 +448,32 @@ DDY - Derivative Relative To Y
dst.w = partialy(src.w)
-KILP - Predicated Discard
+.. opcode:: KILP - Predicated Discard
discard
-PK2H - Pack Two 16-bit Floats
+.. opcode:: PK2H - Pack Two 16-bit Floats
TBD
-PK2US - Pack Two Unsigned 16-bit Scalars
+.. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
TBD
-PK4B - Pack Four Signed 8-bit Scalars
+.. opcode:: PK4B - Pack Four Signed 8-bit Scalars
TBD
-PK4UB - Pack Four Unsigned 8-bit Scalars
+.. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars
TBD
-RFL - Reflection Vector
+.. opcode:: RFL - Reflection Vector
.. math::
@@ -508,10 +485,12 @@ RFL - Reflection Vector
dst.w = 1
-Considered for removal.
+.. note::
+
+ Considered for removal.
-SEQ - Set On Equal
+.. opcode:: SEQ - Set On Equal
.. math::
@@ -524,21 +503,20 @@ SEQ - Set On Equal
dst.w = (src0.w == src1.w) ? 1 : 0
-SFL - Set On False
+.. opcode:: SFL - Set On False
-.. math::
+This instruction replicates its result.
- dst.x = 0
+.. math::
- dst.y = 0
+ dst = 0
- dst.z = 0
+.. note::
- dst.w = 0
+ Considered for removal.
-Considered for removal.
-SGT - Set On Greater Than
+.. opcode:: SGT - Set On Greater Than
.. math::
@@ -551,20 +529,16 @@ SGT - Set On Greater Than
dst.w = (src0.w > src1.w) ? 1 : 0
-SIN - Sine
+.. opcode:: SIN - Sine
-.. math::
+This instruction replicates its result.
- dst.x = \sin{src.x}
-
- dst.y = \sin{src.x}
-
- dst.z = \sin{src.x}
+.. math::
- dst.w = \sin{src.x}
+ dst = \sin{src.x}
-SLE - Set On Less Equal Than
+.. opcode:: SLE - Set On Less Equal Than
.. math::
@@ -577,7 +551,7 @@ SLE - Set On Less Equal Than
dst.w = (src0.w <= src1.w) ? 1 : 0
-SNE - Set On Not Equal
+.. opcode:: SNE - Set On Not Equal
.. math::
@@ -590,59 +564,63 @@ SNE - Set On Not Equal
dst.w = (src0.w != src1.w) ? 1 : 0
-STR - Set On True
+.. opcode:: STR - Set On True
-.. math::
+This instruction replicates its result.
- dst.x = 1
-
- dst.y = 1
-
- dst.z = 1
+.. math::
- dst.w = 1
+ dst = 1
-TEX - Texture Lookup
+.. opcode:: TEX - Texture Lookup
TBD
-TXD - Texture Lookup with Derivatives
+.. opcode:: TXD - Texture Lookup with Derivatives
TBD
-TXP - Projective Texture Lookup
+.. opcode:: TXP - Projective Texture Lookup
TBD
-UP2H - Unpack Two 16-Bit Floats
+.. opcode:: UP2H - Unpack Two 16-Bit Floats
TBD
- Considered for removal.
+.. note::
-UP2US - Unpack Two Unsigned 16-Bit Scalars
+ Considered for removal.
+
+.. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars
TBD
- Considered for removal.
+.. note::
+
+ Considered for removal.
-UP4B - Unpack Four Signed 8-Bit Values
+.. opcode:: UP4B - Unpack Four Signed 8-Bit Values
TBD
- Considered for removal.
+.. note::
+
+ Considered for removal.
-UP4UB - Unpack Four Unsigned 8-Bit Scalars
+.. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars
TBD
- Considered for removal.
+.. note::
-X2D - 2D Coordinate Transformation
+ Considered for removal.
+
+.. opcode:: X2D - 2D Coordinate Transformation
.. math::
@@ -654,20 +632,24 @@ X2D - 2D Coordinate Transformation
dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w
-Considered for removal.
+.. note::
+
+ Considered for removal.
From GL_NV_vertex_program2
^^^^^^^^^^^^^^^^^^^^^^^^^^
-ARA - Address Register Add
+.. opcode:: ARA - Address Register Add
TBD
- Considered for removal.
+.. note::
-ARR - Address Register Load With Round
+ Considered for removal.
+
+.. opcode:: ARR - Address Register Load With Round
.. math::
@@ -680,26 +662,28 @@ ARR - Address Register Load With Round
dst.w = round(src.w)
-BRA - Branch
+.. opcode:: BRA - Branch
pc = target
- Considered for removal.
+.. note::
+
+ Considered for removal.
-CAL - Subroutine Call
+.. opcode:: CAL - Subroutine Call
push(pc)
pc = target
-RET - Subroutine Call Return
+.. opcode:: RET - Subroutine Call Return
pc = pop()
Potential restrictions:
* Only occurs at end of function.
-SSG - Set Sign
+.. opcode:: SSG - Set Sign
.. math::
@@ -712,7 +696,7 @@ SSG - Set Sign
dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
-CMP - Compare
+.. opcode:: CMP - Compare
.. math::
@@ -725,7 +709,7 @@ CMP - Compare
dst.w = (src0.w < 0) ? src1.w : src2.w
-KIL - Conditional Discard
+.. opcode:: KIL - Conditional Discard
.. math::
@@ -734,7 +718,7 @@ KIL - Conditional Discard
endif
-SCS - Sine Cosine
+.. opcode:: SCS - Sine Cosine
.. math::
@@ -747,12 +731,12 @@ SCS - Sine Cosine
dst.y = 1
-TXB - Texture Lookup With Bias
+.. opcode:: TXB - Texture Lookup With Bias
TBD
-NRM - 3-component Vector Normalise
+.. opcode:: NRM - 3-component Vector Normalise
.. math::
@@ -765,7 +749,7 @@ NRM - 3-component Vector Normalise
dst.w = 1
-DIV - Divide
+.. opcode:: DIV - Divide
.. math::
@@ -778,35 +762,31 @@ DIV - Divide
dst.w = \frac{src0.w}{src1.w}
-DP2 - 2-component Dot Product
-
-.. math::
-
- dst.x = src0.x \times src1.x + src0.y \times src1.y
+.. opcode:: DP2 - 2-component Dot Product
- dst.y = src0.x \times src1.x + src0.y \times src1.y
+This instruction replicates its result.
- dst.z = src0.x \times src1.x + src0.y \times src1.y
+.. math::
- dst.w = src0.x \times src1.x + src0.y \times src1.y
+ dst = src0.x \times src1.x + src0.y \times src1.y
-TXL - Texture Lookup With LOD
+.. opcode:: TXL - Texture Lookup With LOD
TBD
-BRK - Break
+.. opcode:: BRK - Break
TBD
-IF - If
+.. opcode:: IF - If
TBD
-BGNFOR - Begin a For-Loop
+.. opcode:: BGNFOR - Begin a For-Loop
dst.x = floor(src.x)
dst.y = floor(src.y)
@@ -819,25 +799,31 @@ BGNFOR - Begin a For-Loop
Note: The destination must be a loop register.
The source must be a constant register.
- Considered for cleanup / removal.
+.. note::
+ Considered for cleanup.
-REP - Repeat
+.. note::
+
+ Considered for removal.
+
+
+.. opcode:: REP - Repeat
TBD
-ELSE - Else
+.. opcode:: ELSE - Else
TBD
-ENDIF - End If
+.. opcode:: ENDIF - End If
TBD
-ENDFOR - End a For-Loop
+.. opcode:: ENDFOR - End a For-Loop
dst.x = dst.x + dst.z
dst.y = dst.y - 1.0
@@ -848,30 +834,48 @@ ENDFOR - End a For-Loop
Note: The destination must be a loop register.
- Considered for cleanup / removal.
+.. note::
+
+ Considered for cleanup.
+
+.. note::
-ENDREP - End Repeat
+ Considered for removal.
+
+.. opcode:: ENDREP - End Repeat
TBD
-PUSHA - Push Address Register On Stack
+.. opcode:: PUSHA - Push Address Register On Stack
push(src.x)
push(src.y)
push(src.z)
push(src.w)
- Considered for cleanup / removal.
+.. note::
+
+ Considered for cleanup.
+
+.. note::
-POPA - Pop Address Register From Stack
+ Considered for removal.
+
+.. opcode:: POPA - Pop Address Register From Stack
dst.w = pop()
dst.z = pop()
dst.y = pop()
dst.x = pop()
- Considered for cleanup / removal.
+.. note::
+
+ Considered for cleanup.
+
+.. note::
+
+ Considered for removal.
From GL_NV_gpu_program4
@@ -879,7 +883,7 @@ From GL_NV_gpu_program4
Support for these opcodes indicated by a special pipe capability bit (TBD).
-CEIL - Ceiling
+.. opcode:: CEIL - Ceiling
.. math::
@@ -892,7 +896,7 @@ CEIL - Ceiling
dst.w = \lceil src.w\rceil
-I2F - Integer To Float
+.. opcode:: I2F - Integer To Float
.. math::
@@ -905,7 +909,7 @@ I2F - Integer To Float
dst.w = (float) src.w
-NOT - Bitwise Not
+.. opcode:: NOT - Bitwise Not
.. math::
@@ -918,7 +922,7 @@ NOT - Bitwise Not
dst.w = ~src.w
-TRUNC - Truncate
+.. opcode:: TRUNC - Truncate
.. math::
@@ -931,7 +935,7 @@ TRUNC - Truncate
dst.w = trunc(src.w)
-SHL - Shift Left
+.. opcode:: SHL - Shift Left
.. math::
@@ -944,7 +948,7 @@ SHL - Shift Left
dst.w = src0.w << src1.x
-SHR - Shift Right
+.. opcode:: SHR - Shift Right
.. math::
@@ -957,7 +961,7 @@ SHR - Shift Right
dst.w = src0.w >> src1.x
-AND - Bitwise And
+.. opcode:: AND - Bitwise And
.. math::
@@ -970,7 +974,7 @@ AND - Bitwise And
dst.w = src0.w & src1.w
-OR - Bitwise Or
+.. opcode:: OR - Bitwise Or
.. math::
@@ -983,7 +987,7 @@ OR - Bitwise Or
dst.w = src0.w | src1.w
-MOD - Modulus
+.. opcode:: MOD - Modulus
.. math::
@@ -996,7 +1000,7 @@ MOD - Modulus
dst.w = src0.w \bmod src1.w
-XOR - Bitwise Xor
+.. opcode:: XOR - Bitwise Xor
.. math::
@@ -1009,7 +1013,7 @@ XOR - Bitwise Xor
dst.w = src0.w \oplus src1.w
-SAD - Sum Of Absolute Differences
+.. opcode:: SAD - Sum Of Absolute Differences
.. math::
@@ -1022,17 +1026,17 @@ SAD - Sum Of Absolute Differences
dst.w = |src0.w - src1.w| + src2.w
-TXF - Texel Fetch
+.. opcode:: TXF - Texel Fetch
TBD
-TXQ - Texture Size Query
+.. opcode:: TXQ - Texture Size Query
TBD
-CONT - Continue
+.. opcode:: CONT - Continue
TBD
@@ -1041,12 +1045,12 @@ From GL_NV_geometry_program4
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-EMIT - Emit
+.. opcode:: EMIT - Emit
TBD
-ENDPRIM - End Primitive
+.. opcode:: ENDPRIM - End Primitive
TBD
@@ -1055,66 +1059,64 @@ From GLSL
^^^^^^^^^^
-BGNLOOP - Begin a Loop
+.. opcode:: BGNLOOP - Begin a Loop
TBD
-BGNSUB - Begin Subroutine
+.. opcode:: BGNSUB - Begin Subroutine
TBD
-ENDLOOP - End a Loop
+.. opcode:: ENDLOOP - End a Loop
TBD
-ENDSUB - End Subroutine
+.. opcode:: ENDSUB - End Subroutine
TBD
-NOP - No Operation
+.. opcode:: NOP - No Operation
Do nothing.
-NRM4 - 4-component Vector Normalise
-
-.. math::
+.. opcode:: NRM4 - 4-component Vector Normalise
- dst.x = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+This instruction replicates its result.
- dst.y = \frac{src.y}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
-
- dst.z = \frac{src.z}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+.. math::
- dst.w = \frac{src.w}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
+ dst = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
ps_2_x
^^^^^^^^^^^^
-CALLNZ - Subroutine Call If Not Zero
+.. opcode:: CALLNZ - Subroutine Call If Not Zero
TBD
-IFC - If
+.. opcode:: IFC - If
TBD
-BREAKC - Break Conditional
+.. opcode:: BREAKC - Break Conditional
TBD
+.. _doubleopcodes:
+
Double Opcodes
^^^^^^^^^^^^^^^
-DADD - Add Double
+.. opcode:: DADD - Add Double
.. math::
@@ -1123,7 +1125,7 @@ DADD - Add Double
dst.zw = src0.zw + src1.zw
-DDIV - Divide Double
+.. opcode:: DDIV - Divide Double
.. math::
@@ -1131,7 +1133,7 @@ DDIV - Divide Double
dst.zw = src0.zw / src1.zw
-DSEQ - Set Double on Equal
+.. opcode:: DSEQ - Set Double on Equal
.. math::
@@ -1139,7 +1141,7 @@ DSEQ - Set Double on Equal
dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F
-DSLT - Set Double on Less than
+.. opcode:: DSLT - Set Double on Less than
.. math::
@@ -1147,7 +1149,7 @@ DSLT - Set Double on Less than
dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F
-DFRAC - Double Fraction
+.. opcode:: DFRAC - Double Fraction
.. math::
@@ -1156,7 +1158,7 @@ DFRAC - Double Fraction
dst.zw = src.zw - \lfloor src.zw\rfloor
-DFRACEXP - Convert Double Number to Fractional and Integral Components
+.. opcode:: DFRACEXP - Convert Double Number to Fractional and Integral Components
.. math::
@@ -1164,7 +1166,7 @@ DFRACEXP - Convert Double Number to Fractional and Integral Components
dst0.zw = frexp(src.zw, dst1.zw)
-DLDEXP - Multiple Double Number by Integral Power of 2
+.. opcode:: DLDEXP - Multiple Double Number by Integral Power of 2
.. math::
@@ -1172,7 +1174,7 @@ DLDEXP - Multiple Double Number by Integral Power of 2
dst.zw = ldexp(src0.zw, src1.zw)
-DMIN - Minimum Double
+.. opcode:: DMIN - Minimum Double
.. math::
@@ -1180,7 +1182,7 @@ DMIN - Minimum Double
dst.zw = min(src0.zw, src1.zw)
-DMAX - Maximum Double
+.. opcode:: DMAX - Maximum Double
.. math::
@@ -1188,7 +1190,7 @@ DMAX - Maximum Double
dst.zw = max(src0.zw, src1.zw)
-DMUL - Multiply Double
+.. opcode:: DMUL - Multiply Double
.. math::
@@ -1197,7 +1199,7 @@ DMUL - Multiply Double
dst.zw = src0.zw \times src1.zw
-DMAD - Multiply And Add Doubles
+.. opcode:: DMAD - Multiply And Add Doubles
.. math::
@@ -1206,7 +1208,7 @@ DMAD - Multiply And Add Doubles
dst.zw = src0.zw \times src1.zw + src2.zw
-DRCP - Reciprocal Double
+.. opcode:: DRCP - Reciprocal Double
.. math::
@@ -1214,7 +1216,7 @@ DRCP - Reciprocal Double
dst.zw = \frac{1}{src.zw}
-DSQRT - Square root double
+.. opcode:: DSQRT - Square root double
.. math::
@@ -1269,20 +1271,8 @@ Keywords
discard Discard fragment.
- dst First destination register.
-
- dst0 First destination register.
-
pc Program counter.
- src First source register.
-
- src0 First source register.
-
- src1 Second source register.
-
- src2 Third source register.
-
target Label of target instruction.
@@ -1441,3 +1431,43 @@ GL_ARB_fragment_coord_conventions extension.
DirectX 9 uses INTEGER.
DirectX 10 uses HALF_INTEGER.
+
+
+
+Texture Sampling and Texture Formats
+------------------------------------
+
+This table shows how texture image components are returned as (x,y,z,w) tuples
+by TGSI texture instructions, such as :opcode:`TEX`, :opcode:`TXD`, and
+:opcode:`TXP`. For reference, OpenGL and Direct3D conventions are shown as
+well.
+
++--------------------+--------------+--------------------+--------------+
+| Texture Components | Gallium | OpenGL | Direct3D 9 |
++====================+==============+====================+==============+
+| R | XXX TBD | (r, 0, 0, 1) | (r, 1, 1, 1) |
++--------------------+--------------+--------------------+--------------+
+| RG | XXX TBD | (r, g, 0, 1) | (r, g, 1, 1) |
++--------------------+--------------+--------------------+--------------+
+| RGB | (r, g, b, 1) | (r, g, b, 1) | (r, g, b, 1) |
++--------------------+--------------+--------------------+--------------+
+| RGBA | (r, g, b, a) | (r, g, b, a) | (r, g, b, a) |
++--------------------+--------------+--------------------+--------------+
+| A | (0, 0, 0, a) | (0, 0, 0, a) | (0, 0, 0, a) |
++--------------------+--------------+--------------------+--------------+
+| L | (l, l, l, 1) | (l, l, l, 1) | (l, l, l, 1) |
++--------------------+--------------+--------------------+--------------+
+| LA | (l, l, l, a) | (l, l, l, a) | (l, l, l, a) |
++--------------------+--------------+--------------------+--------------+
+| I | (i, i, i, i) | (i, i, i, i) | N/A |
++--------------------+--------------+--------------------+--------------+
+| UV | XXX TBD | (0, 0, 0, 1) | (u, v, 1, 1) |
+| | | [#envmap-bumpmap]_ | |
++--------------------+--------------+--------------------+--------------+
+| Z | XXX TBD | (z, z, z, 1) | (0, z, 0, 1) |
+| | | [#depth-tex-mode]_ | |
++--------------------+--------------+--------------------+--------------+
+
+.. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt
+.. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z)
+ or (z, z, z, z) depending on the value of GL_DEPTH_TEXTURE_MODE.
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 0155b9be50..353ae176fd 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -65,7 +65,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
/* compute vertex layout now */
const struct lp_fragment_shader *lpfs = llvmpipe->fs;
struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf;
- const uint num = draw_current_shader_outputs(llvmpipe->draw);
+ const uint num = draw_num_shader_outputs(llvmpipe->draw);
uint i;
/* Tell draw_vbuf to simply emit the whole post-xform vertex
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 60ea9c171d..39bcdc8fe6 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -77,17 +77,21 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
static void find_output_registers(struct r300_fragment_program_compiler * compiler,
struct r300_fragment_shader * fs)
{
- unsigned i;
+ unsigned i, colorbuf_count = 0;
/* Mark the outputs as not present initially */
- compiler->OutputColor = fs->info.num_outputs;
+ compiler->OutputColor[0] = fs->info.num_outputs;
+ compiler->OutputColor[1] = fs->info.num_outputs;
+ compiler->OutputColor[2] = fs->info.num_outputs;
+ compiler->OutputColor[3] = fs->info.num_outputs;
compiler->OutputDepth = fs->info.num_outputs;
/* Now see where they really are. */
for(i = 0; i < fs->info.num_outputs; ++i) {
switch(fs->info.output_semantic_name[i]) {
case TGSI_SEMANTIC_COLOR:
- compiler->OutputColor = i;
+ compiler->OutputColor[colorbuf_count] = i;
+ colorbuf_count++;
break;
case TGSI_SEMANTIC_POSITION:
compiler->OutputDepth = i;
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 453fb1accc..b37be26133 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -37,24 +37,31 @@ unsigned r300_texture_get_stride(struct r300_screen* screen,
unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
unsigned zslice, unsigned face);
-/* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */
+/* Translate a pipe_format into a useful texture format for sampling.
+ *
+ * R300_EASY_TX_FORMAT swizzles the texture.
+ * Note the signature of R300_EASY_TX_FORMAT:
+ * R300_EASY_TX_FORMAT(B, G, R, A, FORMAT);
+ *
+ * The FORMAT specifies how the texture sampler will treat the texture, and
+ * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */
static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
{
switch (format) {
/* X8 */
case PIPE_FORMAT_A8_UNORM:
+ return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8);
case PIPE_FORMAT_I8_UNORM:
return R300_EASY_TX_FORMAT(X, X, X, X, X8);
case PIPE_FORMAT_L8_UNORM:
return R300_EASY_TX_FORMAT(X, X, X, ONE, X8);
/* X16 */
case PIPE_FORMAT_R16_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
return R300_EASY_TX_FORMAT(X, X, X, X, X16);
case PIPE_FORMAT_R16_SNORM:
return R300_EASY_TX_FORMAT(X, X, X, X, X16) |
R300_TX_FORMAT_SIGNED;
- case PIPE_FORMAT_Z16_UNORM:
- return R300_EASY_TX_FORMAT(X, X, X, X, X16);
/* Y8X8 */
case PIPE_FORMAT_A8L8_UNORM:
return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8);
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index a792c2cf98..941ec17016 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -201,6 +201,8 @@ static void transform_srcreg(
struct rc_src_register * dst,
struct tgsi_full_src_register * src)
{
+ unsigned i, j;
+
dst->File = translate_register_file(src->Register.File);
dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index);
dst->RelAddr = src->Register.Indirect;
@@ -210,6 +212,21 @@ static void transform_srcreg(
dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9;
dst->Abs = src->Register.Absolute;
dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0;
+
+ if (src->Register.File == TGSI_FILE_IMMEDIATE) {
+ for (i = 0; i < ttr->imms_to_swizzle_count; i++) {
+ if (ttr->imms_to_swizzle[i].index == src->Register.Index) {
+ dst->File = RC_FILE_TEMPORARY;
+ dst->Index = 0;
+ dst->Swizzle = 0;
+ for (j = 0; j < 4; j++) {
+ dst->Swizzle |= GET_SWZ(ttr->imms_to_swizzle[i].swizzle,
+ tgsi_util_get_full_src_register_swizzle(src, j)) << (j * 3);
+ }
+ break;
+ }
+ }
+ }
}
static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src,
@@ -277,21 +294,45 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst
&ttr->compiler->Program.ShadowSamplers);
}
-static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm)
+static void handle_immediate(struct tgsi_to_rc * ttr,
+ struct tgsi_full_immediate * imm,
+ unsigned index)
{
struct rc_constant constant;
- int i;
+ unsigned swizzle = 0;
+ boolean can_swizzle = TRUE;
+ unsigned i;
- constant.Type = RC_CONSTANT_IMMEDIATE;
- constant.Size = 4;
- for(i = 0; i < 4; ++i)
- constant.u.Immediate[i] = imm->u[i].Float;
- rc_constants_add(&ttr->compiler->Program.Constants, &constant);
+ for (i = 0; i < 4; i++) {
+ if (imm->u[i].Float == 0.0f) {
+ swizzle |= RC_SWIZZLE_ZERO << (i * 3);
+ } else if (imm->u[i].Float == 0.5f) {
+ swizzle |= RC_SWIZZLE_HALF << (i * 3);
+ } else if (imm->u[i].Float == 1.0f) {
+ swizzle |= RC_SWIZZLE_ONE << (i * 3);
+ } else {
+ can_swizzle = FALSE;
+ break;
+ }
+ }
+
+ if (can_swizzle) {
+ ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].index = index;
+ ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].swizzle = swizzle;
+ ttr->imms_to_swizzle_count++;
+ } else {
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 4;
+ for(i = 0; i < 4; ++i)
+ constant.u.Immediate[i] = imm->u[i].Float;
+ rc_constants_add(&ttr->compiler->Program.Constants, &constant);
+ }
}
void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens)
{
struct tgsi_parse_context parser;
+ unsigned imm_index = 0;
int i;
/* Allocate constants placeholders.
@@ -308,6 +349,9 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens)
ttr->immediate_offset = ttr->compiler->Program.Constants.Count;
+ ttr->imms_to_swizzle = malloc(ttr->info->immediate_count * sizeof(struct swizzled_imms));
+ ttr->imms_to_swizzle_count = 0;
+
tgsi_parse_init(&parser, tokens);
while (!tgsi_parse_end_of_tokens(&parser)) {
@@ -317,7 +361,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens)
case TGSI_TOKEN_TYPE_DECLARATION:
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
- handle_immediate(ttr, &parser.FullToken.FullImmediate);
+ handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index);
+ imm_index++;
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
transform_instruction(ttr, &parser.FullToken.FullInstruction);
@@ -327,6 +372,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens)
tgsi_parse_free(&parser);
+ free(ttr->imms_to_swizzle);
+
rc_calculate_inputs_outputs(ttr->compiler);
}
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
index 93e90ec6d2..39b473c7bf 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
@@ -29,11 +29,18 @@ struct tgsi_full_declaration;
struct tgsi_shader_info;
struct tgsi_token;
+struct swizzled_imms {
+ unsigned index;
+ unsigned swizzle;
+};
+
struct tgsi_to_rc {
struct radeon_compiler * compiler;
const struct tgsi_shader_info * info;
int immediate_offset;
+ struct swizzled_imms * imms_to_swizzle;
+ unsigned imms_to_swizzle_count;
};
void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens);
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index f6856a5f69..d2eda7324c 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -30,7 +30,6 @@
#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "draw/draw_vertex.h"
-#include "draw/draw_private.h"
#include "sp_context.h"
#include "sp_screen.h"
#include "sp_state.h"
@@ -67,7 +66,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
/* compute vertex layout now */
const struct sp_fragment_shader *spfs = softpipe->fs;
struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
- const uint num = draw_current_shader_outputs(softpipe->draw);
+ const uint num = draw_num_shader_outputs(softpipe->draw);
uint i;
/* Tell draw_vbuf to simply emit the whole post-xform vertex
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index e88ef8d8fe..c4181c3f5b 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -215,7 +215,6 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
svga->state.hw_draw.num_views = 0;
svga->dirty = ~0;
- svga->state.white_fs_id = SVGA3D_INVALID_ID;
LIST_INITHEAD(&svga->dirty_buffers);
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 66259fd010..ba86256eb2 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -327,10 +327,6 @@ struct svga_context
unsigned texture_timestamp;
- /* Internally generated shaders:
- */
- unsigned white_fs_id;
-
/*
*/
struct svga_sw_state sw;
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index adc7120217..2973444d0a 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -108,70 +108,6 @@ fail:
return ret;
}
-/* The blend workaround for simulating logicop xor behaviour requires
- * that the incoming fragment color be white. This change achieves
- * that by hooking up a hard-wired fragment shader that just emits
- * color 1,1,1,1
- *
- * This is a slightly incomplete solution as it assumes that the
- * actual bound shader has no other effects beyond generating a
- * fragment color. In particular shaders containing TEXKIL and/or
- * depth-write will not have the correct behaviour, nor will those
- * expecting to use alphatest.
- *
- * These are avoidable issues, but they are not much worse than the
- * unavoidable ones associated with this technique, so it's not clear
- * how much effort should be expended trying to resolve them - the
- * ultimate result will still not be correct in most cases.
- *
- * Shader below was generated with:
- * SVGA_DEBUG=tgsi ./mesa/progs/fp/fp-tri white.txt
- */
-static int emit_white_fs( struct svga_context *svga )
-{
- int ret = PIPE_ERROR;
-
- /* ps_3_0
- * def c0, 1.000000, 0.000000, 0.000000, 1.000000
- * mov oC0, c0.x
- * end
- */
- static const unsigned white_tokens[] = {
- 0xffff0300,
- 0x05000051,
- 0xa00f0000,
- 0x3f800000,
- 0x00000000,
- 0x00000000,
- 0x3f800000,
- 0x02000001,
- 0x800f0800,
- 0xa0000000,
- 0x0000ffff,
- };
-
- assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX);
- svga->state.white_fs_id = util_bitmask_add(svga->fs_bm);
- if(svga->state.white_fs_id == SVGA3D_INVALID_ID)
- goto no_fs_id;
-
- ret = SVGA3D_DefineShader(svga->swc,
- svga->state.white_fs_id,
- SVGA3D_SHADERTYPE_PS,
- white_tokens,
- sizeof(white_tokens));
- if (ret)
- goto no_definition;
-
- return 0;
-
-no_definition:
- util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id);
- svga->state.white_fs_id = SVGA3D_INVALID_ID;
-no_fs_id:
- return ret;
-}
-
/* SVGA_NEW_TEXTURE_BINDING
* SVGA_NEW_RAST
@@ -199,6 +135,23 @@ static int make_fs_key( const struct svga_context *svga,
PIPE_WINDING_CW);
}
+ /* The blend workaround for simulating logicop xor behaviour
+ * requires that the incoming fragment color be white. This change
+ * achieves that by creating a varient of the current fragment
+ * shader that overrides all output colors with 1,1,1,1
+ *
+ * This will work for most shaders, including those containing
+ * TEXKIL and/or depth-write. However, it will break on the
+ * combination of xor-logicop plus alphatest.
+ *
+ * Ultimately, we could implement alphatest in the shader using
+ * texkil prior to overriding the outgoing fragment color.
+ *
+ * SVGA_NEW_BLEND
+ */
+ if (svga->curr.blend->need_white_fragments) {
+ key->white_fragments = 1;
+ }
/* XXX: want to limit this to the textures that the shader actually
* refers to.
@@ -238,40 +191,29 @@ static int emit_hw_fs( struct svga_context *svga,
unsigned id = SVGA3D_INVALID_ID;
int ret = 0;
+ struct svga_fragment_shader *fs = svga->curr.fs;
+ struct svga_fs_compile_key key;
+
/* SVGA_NEW_BLEND
+ * SVGA_NEW_TEXTURE_BINDING
+ * SVGA_NEW_RAST
+ * SVGA_NEW_NEED_SWTNL
+ * SVGA_NEW_SAMPLER
*/
- if (svga->curr.blend->need_white_fragments) {
- if (svga->state.white_fs_id == SVGA3D_INVALID_ID) {
- ret = emit_white_fs( svga );
- if (ret)
- return ret;
- }
- id = svga->state.white_fs_id;
- }
- else {
- struct svga_fragment_shader *fs = svga->curr.fs;
- struct svga_fs_compile_key key;
-
- /* SVGA_NEW_TEXTURE_BINDING
- * SVGA_NEW_RAST
- * SVGA_NEW_NEED_SWTNL
- * SVGA_NEW_SAMPLER
- */
- ret = make_fs_key( svga, &key );
+ ret = make_fs_key( svga, &key );
+ if (ret)
+ return ret;
+
+ result = search_fs_key( fs, &key );
+ if (!result) {
+ ret = compile_fs( svga, fs, &key, &result );
if (ret)
return ret;
-
- result = search_fs_key( fs, &key );
- if (!result) {
- ret = compile_fs( svga, fs, &key, &result );
- if (ret)
- return ret;
- }
-
- assert (result);
- id = result->id;
}
+ assert (result);
+ id = result->id;
+
assert(id != SVGA3D_INVALID_ID);
if (result != svga->state.hw_draw.fs) {
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
index 737a2213af..063c9cf422 100644
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -49,6 +49,7 @@ struct svga_fs_compile_key
{
unsigned light_twoside:1;
unsigned front_cw:1;
+ unsigned white_fragments:1;
unsigned num_textures:8;
unsigned num_unnormalized_coords:8;
struct {
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
index 43fc0d3235..73102a72a8 100644
--- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@@ -194,8 +194,19 @@ static boolean ps30_output( struct svga_shader_emitter *emit,
switch (semantic.Name) {
case TGSI_SEMANTIC_COLOR:
- emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT,
- semantic.Index );
+ if (emit->unit == PIPE_SHADER_FRAGMENT &&
+ emit->key.fkey.white_fragments) {
+
+ emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+ emit->nr_hw_temp++ );
+ emit->temp_col[idx] = emit->output_map[idx];
+ emit->true_col[idx] = dst_register( SVGA3DREG_COLOROUT,
+ semantic.Index );
+ }
+ else {
+ emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT,
+ semantic.Index );
+ }
break;
case TGSI_SEMANTIC_POSITION:
emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
index 2557824293..e8f75485d5 100644
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -79,6 +79,8 @@ struct svga_shader_emitter
int ps30_input_count;
+ int dynamic_branching_level;
+
boolean in_main_func;
boolean created_zero_immediate;
@@ -199,6 +201,23 @@ static INLINE boolean emit_op3( struct svga_shader_emitter *emit,
}
+static INLINE boolean emit_op4( struct svga_shader_emitter *emit,
+ SVGA3dShaderInstToken inst,
+ SVGA3dShaderDestToken dest,
+ struct src_register src0,
+ struct src_register src1,
+ struct src_register src2,
+ struct src_register src3)
+{
+ return (emit_instruction( emit, inst ) &&
+ emit_dst( emit, dest ) &&
+ emit_src( emit, src0 ) &&
+ emit_src( emit, src1 ) &&
+ emit_src( emit, src2 ) &&
+ emit_src( emit, src3 ));
+}
+
+
#define TRANSLATE_SWIZZLE(x,y,z,w) ((x) | ((y) << 2) | ((z) << 4) | ((w) << 6))
#define SWIZZLE_XYZW \
TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_W)
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index dc5eb8fc60..be821e9821 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -46,8 +46,6 @@ translate_opcode(
case TGSI_OPCODE_ABS: return SVGA3DOP_ABS;
case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
case TGSI_OPCODE_BREAKC: return SVGA3DOP_BREAKC;
- case TGSI_OPCODE_DDX: return SVGA3DOP_DSX;
- case TGSI_OPCODE_DDY: return SVGA3DOP_DSY;
case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
case TGSI_OPCODE_DP4: return SVGA3DOP_DP4;
@@ -415,6 +413,88 @@ static boolean submit_op3( struct svga_shader_emitter *emit,
}
+
+
+/* SVGA shaders may not refer to >1 constant register in a single
+ * instruction. This function checks for that usage and inserts a
+ * move to temporary if detected.
+ */
+static boolean submit_op4( struct svga_shader_emitter *emit,
+ SVGA3dShaderInstToken inst,
+ SVGA3dShaderDestToken dest,
+ struct src_register src0,
+ struct src_register src1,
+ struct src_register src2,
+ struct src_register src3)
+{
+ SVGA3dShaderDestToken temp0;
+ SVGA3dShaderDestToken temp3;
+ boolean need_temp0 = FALSE;
+ boolean need_temp3 = FALSE;
+ SVGA3dShaderRegType type0, type1, type2, type3;
+
+ temp0.value = 0;
+ temp3.value = 0;
+ type0 = SVGA3dShaderGetRegType( src0.base.value );
+ type1 = SVGA3dShaderGetRegType( src1.base.value );
+ type2 = SVGA3dShaderGetRegType( src2.base.value );
+ type3 = SVGA3dShaderGetRegType( src2.base.value );
+
+ /* Make life a little easier - this is only used by the TXD
+ * instruction which is guaranteed not to have a constant/input reg
+ * in one slot at least:
+ */
+ assert(type1 == SVGA3DREG_SAMPLER);
+
+ if (type0 == SVGA3DREG_CONST &&
+ ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
+ (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
+ need_temp0 = TRUE;
+
+ if (type3 == SVGA3DREG_CONST &&
+ (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
+ need_temp3 = TRUE;
+
+ if (type0 == SVGA3DREG_INPUT &&
+ ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
+ (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
+ need_temp0 = TRUE;
+
+ if (type3 == SVGA3DREG_INPUT &&
+ (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
+ need_temp3 = TRUE;
+
+ if (need_temp0)
+ {
+ temp0 = get_temp( emit );
+
+ if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
+ return FALSE;
+
+ src0 = src( temp0 );
+ }
+
+ if (need_temp3)
+ {
+ temp3 = get_temp( emit );
+
+ if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 ))
+ return FALSE;
+
+ src3 = src( temp3 );
+ }
+
+ if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
+ return FALSE;
+
+ if (need_temp3)
+ release_temp( emit, temp3 );
+ if (need_temp0)
+ release_temp( emit, temp0 );
+ return TRUE;
+}
+
+
static boolean emit_def_const( struct svga_shader_emitter *emit,
SVGA3dShaderConstType type,
unsigned idx,
@@ -660,6 +740,8 @@ static boolean emit_if(struct svga_shader_emitter *emit,
if_token.control = SVGA3DOPCOMPC_NE;
zero = scalar(zero, TGSI_SWIZZLE_X);
+ emit->dynamic_branching_level++;
+
return (emit_instruction( emit, if_token ) &&
emit_src( emit, src ) &&
emit_src( emit, zero ) );
@@ -668,6 +750,8 @@ static boolean emit_if(struct svga_shader_emitter *emit,
static boolean emit_endif(struct svga_shader_emitter *emit,
const struct tgsi_full_instruction *insn)
{
+ emit->dynamic_branching_level--;
+
return (emit_instruction( emit,
inst_token( SVGA3DOP_ENDIF )));
}
@@ -1011,10 +1095,10 @@ static boolean emit_kilp(struct svga_shader_emitter *emit,
{
SVGA3dShaderInstToken inst;
SVGA3dShaderDestToken temp;
- struct src_register one = get_zero_immediate( emit );
+ struct src_register one = scalar( get_zero_immediate( emit ),
+ TGSI_SWIZZLE_W );
inst = inst_token( SVGA3DOP_TEXKILL );
- one = scalar( one, TGSI_SWIZZLE_W );
/* texkill doesn't allow negation on the operand so lets move
* negation of {1} to a temp register */
@@ -1169,41 +1253,79 @@ static boolean emit_tex2(struct svga_shader_emitter *emit,
SVGA3dShaderDestToken dst )
{
SVGA3dShaderInstToken inst;
- struct src_register src0;
- struct src_register src1;
-
+ struct src_register texcoord;
+ struct src_register sampler;
+ SVGA3dShaderDestToken tmp;
+
inst.value = 0;
- inst.op = SVGA3DOP_TEX;
switch (insn->Instruction.Opcode) {
case TGSI_OPCODE_TEX:
+ inst.op = SVGA3DOP_TEX;
break;
case TGSI_OPCODE_TXP:
+ inst.op = SVGA3DOP_TEX;
inst.control = SVGA3DOPCONT_PROJECT;
break;
case TGSI_OPCODE_TXB:
+ inst.op = SVGA3DOP_TEX;
inst.control = SVGA3DOPCONT_BIAS;
break;
+ case TGSI_OPCODE_TXL:
+ inst.op = SVGA3DOP_TEXLDL;
+ break;
default:
assert(0);
return FALSE;
}
- src0 = translate_src_register( emit, &insn->Src[0] );
- src1 = translate_src_register( emit, &insn->Src[1] );
+ texcoord = translate_src_register( emit, &insn->Src[0] );
+ sampler = translate_src_register( emit, &insn->Src[1] );
- if (emit->key.fkey.tex[src1.base.num].unnormalized) {
- struct src_register wh = get_tex_dimensions( emit, src1.base.num );
- SVGA3dShaderDestToken tmp = get_temp( emit );
+ if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
+ emit->dynamic_branching_level > 0)
+ tmp = get_temp( emit );
+
+ /* Can't do mipmapping inside dynamic branch constructs. Force LOD
+ * zero in that case.
+ */
+ if (emit->dynamic_branching_level > 0 &&
+ inst.op == SVGA3DOP_TEX &&
+ SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
+ struct src_register zero = get_zero_immediate( emit );
+
+ /* MOV tmp, texcoord */
+ if (!submit_op1( emit,
+ inst_token( SVGA3DOP_MOV ),
+ tmp,
+ texcoord ))
+ return FALSE;
+
+ /* MOV tmp.w, zero */
+ if (!submit_op1( emit,
+ inst_token( SVGA3DOP_MOV ),
+ writemask( tmp, TGSI_WRITEMASK_W ),
+ scalar( zero, TGSI_SWIZZLE_X )))
+ return FALSE;
+
+ texcoord = src( tmp );
+ inst.op = SVGA3DOP_TEXLDL;
+ }
+
+ /* Explicit normalization of texcoords:
+ */
+ if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
+ struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
/* MUL tmp, SRC0, WH */
if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
- tmp, src0, wh ))
+ tmp, texcoord, wh ))
return FALSE;
- src0 = src( tmp );
+
+ texcoord = src( tmp );
}
- return submit_op2( emit, inst, dst, src0, src1 );
+ return submit_op2( emit, inst, dst, texcoord, sampler );
}
@@ -1211,31 +1333,33 @@ static boolean emit_tex2(struct svga_shader_emitter *emit,
/* Translate texture instructions to SVGA3D representation.
*/
-static boolean emit_tex3(struct svga_shader_emitter *emit,
+static boolean emit_tex4(struct svga_shader_emitter *emit,
const struct tgsi_full_instruction *insn,
SVGA3dShaderDestToken dst )
{
SVGA3dShaderInstToken inst;
- struct src_register src0;
- struct src_register src1;
- struct src_register src2;
+ struct src_register texcoord;
+ struct src_register ddx;
+ struct src_register ddy;
+ struct src_register sampler;
+
+ texcoord = translate_src_register( emit, &insn->Src[0] );
+ ddx = translate_src_register( emit, &insn->Src[1] );
+ ddy = translate_src_register( emit, &insn->Src[2] );
+ sampler = translate_src_register( emit, &insn->Src[3] );
inst.value = 0;
switch (insn->Instruction.Opcode) {
case TGSI_OPCODE_TXD:
- inst.op = SVGA3DOP_TEXLDD;
- break;
- case TGSI_OPCODE_TXL:
- inst.op = SVGA3DOP_TEXLDL;
+ inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
break;
+ default:
+ assert(0);
+ return FALSE;
}
- src0 = translate_src_register( emit, &insn->Src[0] );
- src1 = translate_src_register( emit, &insn->Src[1] );
- src2 = translate_src_register( emit, &insn->Src[2] );
-
- return submit_op3( emit, inst, dst, src0, src1, src2 );
+ return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
}
@@ -1271,12 +1395,12 @@ static boolean emit_tex(struct svga_shader_emitter *emit,
case TGSI_OPCODE_TEX:
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_TXL:
if (!emit_tex2( emit, insn, tex_result ))
return FALSE;
break;
- case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXD:
- if (!emit_tex3( emit, insn, tex_result ))
+ if (!emit_tex4( emit, insn, tex_result ))
return FALSE;
break;
default:
@@ -1330,6 +1454,8 @@ static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
struct src_register const_int = get_loop_const( emit );
+ emit->dynamic_branching_level++;
+
return (emit_instruction( emit, inst ) &&
emit_src( emit, loop_reg ) &&
emit_src( emit, const_int ) );
@@ -1339,6 +1465,9 @@ static boolean emit_endloop2( struct svga_shader_emitter *emit,
const struct tgsi_full_instruction *insn )
{
SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
+
+ emit->dynamic_branching_level--;
+
return emit_instruction( emit, inst );
}
@@ -1398,6 +1527,46 @@ static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
}
}
+
+static boolean emit_deriv(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ if (emit->dynamic_branching_level > 0 &&
+ insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
+ {
+ struct src_register zero = get_zero_immediate( emit );
+ SVGA3dShaderDestToken dst =
+ translate_dst_register( emit, insn, 0 );
+
+ /* Deriv opcodes not valid inside dynamic branching, workaround
+ * by zeroing out the destination.
+ */
+ if (!submit_op1(emit,
+ inst_token( SVGA3DOP_MOV ),
+ dst,
+ scalar(zero, TGSI_SWIZZLE_X)))
+ return FALSE;
+
+ return TRUE;
+ }
+ else {
+ unsigned opcode;
+
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_DDX:
+ opcode = SVGA3DOP_DSX;
+ break;
+ case TGSI_OPCODE_DDY:
+ opcode = SVGA3DOP_DSY;
+ break;
+ default:
+ return FALSE;
+ }
+
+ return emit_simple_instruction( emit, opcode, insn );
+ }
+}
+
static boolean emit_arl(struct svga_shader_emitter *emit,
const struct tgsi_full_instruction *insn)
{
@@ -2002,6 +2171,10 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
case TGSI_OPCODE_TXD:
return emit_tex( emit, insn );
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ return emit_deriv( emit, insn );
+
case TGSI_OPCODE_BGNSUB:
return emit_bgnsub( emit, position, insn );
@@ -2254,11 +2427,28 @@ static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
- if (!submit_op1( emit,
- inst_token(SVGA3DOP_MOV),
- emit->true_col[i],
- src(emit->temp_col[i]) ))
- return FALSE;
+ /* Potentially override output colors with white for XOR
+ * logicop workaround.
+ */
+ if (emit->unit == PIPE_SHADER_FRAGMENT &&
+ emit->key.fkey.white_fragments) {
+
+ struct src_register one = scalar( get_zero_immediate( emit ),
+ TGSI_SWIZZLE_W );
+
+ if (!submit_op1( emit,
+ inst_token(SVGA3DOP_MOV),
+ emit->true_col[i],
+ one ))
+ return FALSE;
+ }
+ else {
+ if (!submit_op1( emit,
+ inst_token(SVGA3DOP_MOV),
+ emit->true_col[i],
+ src(emit->temp_col[i]) ))
+ return FALSE;
+ }
}
}
@@ -2467,6 +2657,9 @@ needs_to_create_zero( struct svga_shader_emitter *emit )
if (emit->key.fkey.light_twoside)
return TRUE;
+ if (emit->key.fkey.white_fragments)
+ return TRUE;
+
if (emit->emit_frontface)
return TRUE;
@@ -2476,6 +2669,10 @@ needs_to_create_zero( struct svga_shader_emitter *emit )
}
if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_BGNFOR] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
@@ -2702,6 +2899,8 @@ boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
goto done;
}
+ assert(emit->dynamic_branching_level == 0);
+
/* Need to terminate the whole shader:
*/
ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c
index 8a73e81d4a..30e2c347bd 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.c
@@ -1087,8 +1087,8 @@ egl_g3d_bind_tex_image(_EGLDriver *drv, _EGLDisplay *dpy,
_EGLSurface *surf, EGLint buffer)
{
struct egl_g3d_surface *gsurf = egl_g3d_surface(surf);
- _EGLContext *ctx = _eglGetAPIContext(EGL_OPENGL_ES_API);
- struct egl_g3d_context *gctx = egl_g3d_context(ctx);
+ _EGLContext *es1 = _eglGetAPIContext(EGL_OPENGL_ES_API);
+ struct egl_g3d_context *gctx;
enum pipe_format target_format;
int target;
@@ -1118,6 +1118,11 @@ egl_g3d_bind_tex_image(_EGLDriver *drv, _EGLDisplay *dpy,
return _eglError(EGL_BAD_MATCH, "eglBindTexImage");
}
+ if (!es1)
+ return EGL_TRUE;
+ if (!gsurf->render_surface)
+ return EGL_FALSE;
+
/* flush properly if the surface is bound */
if (gsurf->base.CurrentContext) {
gctx = egl_g3d_context(gsurf->base.CurrentContext);
@@ -1125,14 +1130,11 @@ egl_g3d_bind_tex_image(_EGLDriver *drv, _EGLDisplay *dpy,
PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL);
}
- if (gctx) {
- if (!gsurf->render_surface)
- return EGL_FALSE;
+ gctx = egl_g3d_context(es1);
+ gctx->stapi->st_bind_texture_surface(gsurf->render_surface,
+ target, gsurf->base.MipmapLevel, target_format);
- gctx->stapi->st_bind_texture_surface(gsurf->render_surface,
- target, gsurf->base.MipmapLevel, target_format);
- gsurf->base.BoundToTexture = EGL_TRUE;
- }
+ gsurf->base.BoundToTexture = EGL_TRUE;
return EGL_TRUE;
}
diff --git a/src/gallium/state_trackers/vega/asm_fill.h b/src/gallium/state_trackers/vega/asm_fill.h
index 2f394ad6c5..27773467fa 100644
--- a/src/gallium/state_trackers/vega/asm_fill.h
+++ b/src/gallium/state_trackers/vega/asm_fill.h
@@ -27,166 +27,375 @@
#ifndef ASM_FILL_H
#define ASM_FILL_H
-static const char solid_fill_asm[] =
- "MOV %s, CONST[0]\n";
-
-
-static const char linear_grad_asm[] =
- "MOV TEMP[0].xy, IN[0]\n"
- "MOV TEMP[0].z, CONST[1].yyyy\n"
- "DP3 TEMP[1], CONST[2], TEMP[0]\n"
- "DP3 TEMP[2], CONST[3], TEMP[0]\n"
- "DP3 TEMP[3], CONST[4], TEMP[0]\n"
- "RCP TEMP[3], TEMP[3]\n"
- "MUL TEMP[1], TEMP[1], TEMP[3]\n"
- "MUL TEMP[2], TEMP[2], TEMP[3]\n"
- "MOV TEMP[4].x, TEMP[1]\n"
- "MOV TEMP[4].y, TEMP[2]\n"
- "MUL TEMP[0], CONST[0].yyyy, TEMP[4].yyyy\n"
- "MAD TEMP[1], CONST[0].xxxx, TEMP[4].xxxx, TEMP[0]\n"
- "MUL TEMP[2], TEMP[1], CONST[0].zzzz\n"
- "TEX %s, TEMP[2], SAMP[0], 1D\n";
-
-static const char radial_grad_asm[] =
- "MOV TEMP[0].xy, IN[0]\n"
- "MOV TEMP[0].z, CONST[1].yyyy\n"
- "DP3 TEMP[1], CONST[2], TEMP[0]\n"
- "DP3 TEMP[2], CONST[3], TEMP[0]\n"
- "DP3 TEMP[3], CONST[4], TEMP[0]\n"
- "RCP TEMP[3], TEMP[3]\n"
- "MUL TEMP[1], TEMP[1], TEMP[3]\n"
- "MUL TEMP[2], TEMP[2], TEMP[3]\n"
- "MOV TEMP[5].x, TEMP[1]\n"
- "MOV TEMP[5].y, TEMP[2]\n"
- "MUL TEMP[0], CONST[0].yyyy, TEMP[5].yyyy\n"
- "MAD TEMP[1], CONST[0].xxxx, TEMP[5].xxxx, TEMP[0]\n"
- "ADD TEMP[1], TEMP[1], TEMP[1]\n"
- "MUL TEMP[3], TEMP[5].yyyy, TEMP[5].yyyy\n"
- "MAD TEMP[4], TEMP[5].xxxx, TEMP[5].xxxx, TEMP[3]\n"
- "MOV TEMP[4], -TEMP[4]\n"
- "MUL TEMP[2], CONST[0].zzzz, TEMP[4]\n"
- "MUL TEMP[0], CONST[1].wwww, TEMP[2]\n"
- "MUL TEMP[3], TEMP[1], TEMP[1]\n"
- "SUB TEMP[2], TEMP[3], TEMP[0]\n"
- "RSQ TEMP[2], |TEMP[2]|\n"
- "RCP TEMP[2], TEMP[2]\n"
- "SUB TEMP[1], TEMP[2], TEMP[1]\n"
- "ADD TEMP[0], CONST[0].zzzz, CONST[0].zzzz\n"
- "RCP TEMP[0], TEMP[0]\n"
- "MUL TEMP[2], TEMP[1], TEMP[0]\n"
- "TEX %s, TEMP[2], SAMP[0], 1D\n";
-
-static const char pattern_asm[] =
- "MOV TEMP[0].xy, IN[0]\n"
- "MOV TEMP[0].z, CONST[1].yyyy\n"
- "DP3 TEMP[1], CONST[2], TEMP[0]\n"
- "DP3 TEMP[2], CONST[3], TEMP[0]\n"
- "DP3 TEMP[3], CONST[4], TEMP[0]\n"
- "RCP TEMP[3], TEMP[3]\n"
- "MUL TEMP[1], TEMP[1], TEMP[3]\n"
- "MUL TEMP[2], TEMP[2], TEMP[3]\n"
- "MOV TEMP[4].x, TEMP[1]\n"
- "MOV TEMP[4].y, TEMP[2]\n"
- "RCP TEMP[0], CONST[1].zwzw\n"
- "MOV TEMP[1], TEMP[4]\n"
- "MUL TEMP[1].x, TEMP[1], TEMP[0]\n"
- "MUL TEMP[1].y, TEMP[1], TEMP[0]\n"
- "TEX %s, TEMP[1], SAMP[0], 2D\n";
-
-
-static const char mask_asm[] =
- "TEX TEMP[1], IN[0], SAMP[1], 2D\n"
- "MUL TEMP[0].w, TEMP[0].wwww, TEMP[1].wwww\n"
- "MOV %s, TEMP[0]\n";
-
-
-static const char image_normal_asm[] =
- "TEX %s, IN[1], SAMP[3], 2D\n";
-
-static const char image_multiply_asm[] =
- "TEX TEMP[1], IN[1], SAMP[3], 2D\n"
- "MUL %s, TEMP[0], TEMP[1]\n";
-
-static const char image_stencil_asm[] =
- "TEX TEMP[1], IN[1], SAMP[3], 2D\n"
- "MUL %s, TEMP[0], TEMP[1]\n";
-
-
-#define EXTENDED_BLEND_OVER \
- "SUB TEMP[3], CONST[1].yyyy, TEMP[1].wwww\n" \
- "SUB TEMP[4], CONST[1].yyyy, TEMP[0].wwww\n" \
- "MUL TEMP[3], TEMP[0], TEMP[3]\n" \
- "MUL TEMP[4], TEMP[1], TEMP[4]\n" \
- "ADD TEMP[3], TEMP[3], TEMP[4]\n"
-
-static const char blend_multiply_asm[] =
- "TEX TEMP[1], IN[0], SAMP[2], 2D\n"
- EXTENDED_BLEND_OVER
- "MUL TEMP[4], TEMP[0], TEMP[1]\n"
- "ADD TEMP[1], TEMP[4], TEMP[3]\n"/*result.rgb*/
- "MUL TEMP[2], TEMP[0].wwww, TEMP[1].wwww\n"
- "ADD TEMP[3], TEMP[0].wwww, TEMP[1].wwww\n"
- "SUB TEMP[1].w, TEMP[3], TEMP[2]\n"
- "MOV %s, TEMP[1]\n";
-#if 1
-static const char blend_screen_asm[] =
- "TEX TEMP[1], IN[0], SAMP[2], 2D\n"
- "ADD TEMP[3], TEMP[0], TEMP[1]\n"
- "MUL TEMP[2], TEMP[0], TEMP[1]\n"
- "SUB %s, TEMP[3], TEMP[2]\n";
-#else
-static const char blend_screen_asm[] =
- "TEX TEMP[1], IN[0], SAMP[2], 2D\n"
- "MOV %s, TEMP[1]\n";
-#endif
-
-static const char blend_darken_asm[] =
- "TEX TEMP[1], IN[0], SAMP[2], 2D\n"
- EXTENDED_BLEND_OVER
- "MUL TEMP[4], TEMP[0], TEMP[1].wwww\n"
- "MUL TEMP[5], TEMP[1], TEMP[0].wwww\n"
- "MIN TEMP[4], TEMP[4], TEMP[5]\n"
- "ADD TEMP[1], TEMP[3], TEMP[4]\n"
- "MUL TEMP[2], TEMP[0].wwww, TEMP[1].wwww\n"
- "ADD TEMP[3], TEMP[0].wwww, TEMP[1].wwww\n"
- "SUB TEMP[1].w, TEMP[3], TEMP[2]\n"
- "MOV %s, TEMP[1]\n";
-
-static const char blend_lighten_asm[] =
- "TEX TEMP[1], IN[0], SAMP[2], 2D\n"
- EXTENDED_BLEND_OVER
- "MUL TEMP[4], TEMP[0], TEMP[1].wwww\n"
- "MUL TEMP[5], TEMP[1], TEMP[0].wwww\n"
- "MAX TEMP[4], TEMP[4], TEMP[5]\n"
- "ADD TEMP[1], TEMP[3], TEMP[4]\n"
- "MUL TEMP[2], TEMP[0].wwww, TEMP[1].wwww\n"
- "ADD TEMP[3], TEMP[0].wwww, TEMP[1].wwww\n"
- "SUB TEMP[1].w, TEMP[3], TEMP[2]\n"
- "MOV %s, TEMP[1]\n";
-
-
-static const char premultiply_asm[] =
- "MUL TEMP[0].xyz, TEMP[0], TEMP[0].wwww\n";
-
-static const char unpremultiply_asm[] =
- "TEX TEMP[0], IN[0], SAMP[1], 2D\n";
-
-
-static const char color_bw_asm[] =
- "ADD TEMP[1], CONST[1].yyyy, CONST[1].yyyy\n"
- "RCP TEMP[2], TEMP[1]\n"
- "ADD TEMP[1], CONST[1].yyyy, TEMP[2]\n"
- "ADD TEMP[2].x, TEMP[0].xxxx, TEMP[0].yyyy\n"
- "ADD TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx\n"
- "SGE TEMP[0].xyz, TEMP[2].xxxx, TEMP[1]\n"
- "SGE TEMP[0].w, TEMP[0].wwww, TEMP[2].yyyy\n"
- "MOV %s, TEMP[0]\n";
+#include "tgsi/tgsi_ureg.h"
+
+typedef void (* ureg_func)( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant);
+
+static INLINE void
+solid_fill( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_MOV(ureg, *out, constant[0]);
+}
+
+static INLINE void
+linear_grad( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+
+ ureg_MOV(ureg,
+ ureg_writemask(temp[0], TGSI_WRITEMASK_XY),
+ in[0]);
+ ureg_MOV(ureg,
+ ureg_writemask(temp[0], TGSI_WRITEMASK_Z),
+ ureg_scalar(constant[1], TGSI_SWIZZLE_Y));
+ ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0]));
+ ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0]));
+ ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0]));
+ ureg_RCP(ureg, temp[3], ureg_src(temp[3]));
+ ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3]));
+ ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3]));
+ ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_X), ureg_src(temp[1]));
+ ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_Y), ureg_src(temp[2]));
+ ureg_MUL(ureg, temp[0],
+ ureg_scalar(constant[0], TGSI_SWIZZLE_Y),
+ ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_Y));
+ ureg_MAD(ureg, temp[1],
+ ureg_scalar(constant[0], TGSI_SWIZZLE_X),
+ ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_X),
+ ureg_src(temp[0]));
+ ureg_MUL(ureg, temp[2], ureg_src(temp[1]),
+ ureg_scalar(constant[0], TGSI_SWIZZLE_Z));
+ ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]);
+}
+
+static INLINE void
+radial_grad( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+
+ ureg_MOV(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_XY), in[0]);
+ ureg_MOV(ureg,
+ ureg_writemask(temp[0], TGSI_WRITEMASK_Z),
+ ureg_scalar(constant[1], TGSI_SWIZZLE_Y));
+ ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0]));
+ ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0]));
+ ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0]));
+ ureg_RCP(ureg, temp[3], ureg_src(temp[3]));
+ ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3]));
+ ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3]));
+ ureg_MOV(ureg, ureg_writemask(temp[5], TGSI_WRITEMASK_X), ureg_src(temp[1]));
+ ureg_MOV(ureg, ureg_writemask(temp[5], TGSI_WRITEMASK_Y), ureg_src(temp[2]));
+ ureg_MUL(ureg, temp[0], ureg_scalar(constant[0], TGSI_SWIZZLE_Y),
+ ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y));
+ ureg_MAD(ureg, temp[1],
+ ureg_scalar(constant[0], TGSI_SWIZZLE_X),
+ ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X), ureg_src(temp[0]));
+ ureg_ADD(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[1]));
+ ureg_MUL(ureg, temp[3],
+ ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y),
+ ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y));
+ ureg_MAD(ureg, temp[4],
+ ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X),
+ ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X),
+ ureg_src(temp[3]));
+ ureg_MOV(ureg, temp[4], ureg_negate(ureg_src(temp[4])));
+ ureg_MUL(ureg, temp[2],
+ ureg_scalar(constant[0], TGSI_SWIZZLE_Z),
+ ureg_src(temp[4]));
+ ureg_MUL(ureg, temp[0],
+ ureg_scalar(constant[1], TGSI_SWIZZLE_W),
+ ureg_src(temp[2]));
+ ureg_MUL(ureg, temp[3], ureg_src(temp[1]), ureg_src(temp[1]));
+
+ ureg_SUB(ureg, temp[2], ureg_src(temp[3]), ureg_src(temp[0]));
+ ureg_RSQ(ureg, temp[2], ureg_abs(ureg_src(temp[2])));
+ ureg_RCP(ureg, temp[2], ureg_src(temp[2]));
+ ureg_SUB(ureg, temp[1], ureg_src(temp[2]), ureg_src(temp[1]));
+ ureg_ADD(ureg, temp[0],
+ ureg_scalar(constant[0], TGSI_SWIZZLE_Z),
+ ureg_scalar(constant[0], TGSI_SWIZZLE_Z));
+ ureg_RCP(ureg, temp[0], ureg_src(temp[0]));
+ ureg_MUL(ureg, temp[2], ureg_src(temp[1]), ureg_src(temp[0]));
+ ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]);
+
+}
+
+
+static INLINE void
+pattern( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_MOV(ureg,
+ ureg_writemask(temp[0], TGSI_WRITEMASK_XY),
+ in[0]);
+ ureg_MOV(ureg,
+ ureg_writemask(temp[0], TGSI_WRITEMASK_Z),
+ ureg_scalar(constant[1], TGSI_SWIZZLE_Y));
+ ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0]));
+ ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0]));
+ ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0]));
+ ureg_RCP(ureg, temp[3], ureg_src(temp[3]));
+ ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3]));
+ ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3]));
+ ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_X), ureg_src(temp[1]));
+ ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_Y), ureg_src(temp[2]));
+ ureg_RCP(ureg, temp[0],
+ ureg_swizzle(constant[1],
+ TGSI_SWIZZLE_Z,
+ TGSI_SWIZZLE_W,
+ TGSI_SWIZZLE_Z,
+ TGSI_SWIZZLE_W));
+ ureg_MOV(ureg, temp[1], ureg_src(temp[4]));
+ ureg_MUL(ureg,
+ ureg_writemask(temp[1], TGSI_WRITEMASK_X),
+ ureg_src(temp[1]),
+ ureg_src(temp[0]));
+ ureg_MUL(ureg,
+ ureg_writemask(temp[1], TGSI_WRITEMASK_Y),
+ ureg_src(temp[1]),
+ ureg_src(temp[0]));
+ ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, ureg_src(temp[1]), sampler[0]);
+}
+
+static INLINE void
+mask( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[1]);
+ ureg_MUL(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_W),
+ ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
+ ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W));
+ ureg_MOV(ureg, *out, ureg_src(temp[0]));
+}
+
+static INLINE void
+image_normal( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, in[1], sampler[3]);
+}
+
+
+static INLINE void
+image_multiply( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]);
+ ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1]));
+}
+
+
+static INLINE void
+image_stencil( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]);
+ ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1]));
+}
+
+#define EXTENDED_BLENDER_OVER_FUNC \
+ ureg_SUB(ureg, temp[3], \
+ ureg_scalar(constant[1], TGSI_SWIZZLE_Y), \
+ ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); \
+ ureg_SUB(ureg, temp[3], \
+ ureg_scalar(constant[1], TGSI_SWIZZLE_Y), \
+ ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); \
+ ureg_MUL(ureg, temp[3], ureg_src(temp[0]), ureg_src(temp[3])); \
+ ureg_MUL(ureg, temp[4], ureg_src(temp[1]), ureg_src(temp[4])); \
+ ureg_ADD(ureg, temp[3], ureg_src(temp[3]), ureg_src(temp[4]));
+
+
+static INLINE void
+blend_multiply( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
+ EXTENDED_BLENDER_OVER_FUNC
+ ureg_MUL(ureg, temp[4], ureg_src(temp[0]), ureg_src(temp[1]));
+ ureg_ADD(ureg, temp[1], ureg_src(temp[4]), ureg_src(temp[3]));
+
+ ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
+ ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W));
+ ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
+ ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W));
+ ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W),
+ ureg_src(temp[3]), ureg_src(temp[2]));
+
+ ureg_MOV(ureg, *out, ureg_src(temp[1]));
+}
+
+static INLINE void
+blend_screen( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
+ ureg_ADD(ureg, temp[3], ureg_src(temp[0]), ureg_src(temp[1]));
+ ureg_MUL(ureg, temp[2], ureg_src(temp[0]), ureg_src(temp[1]));
+ ureg_SUB(ureg, *out, ureg_src(temp[3]), ureg_src(temp[2]));
+}
+
+static INLINE void
+blend_darken( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
+ EXTENDED_BLENDER_OVER_FUNC
+ ureg_MUL(ureg, temp[4], ureg_src(temp[0]),
+ ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W));
+ ureg_MUL(ureg, temp[5], ureg_src(temp[1]),
+ ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W));
+ ureg_MIN(ureg, temp[4], ureg_src(temp[4]), ureg_src(temp[5]));
+ ureg_ADD(ureg, temp[1], ureg_src(temp[3]), ureg_src(temp[4]));
+
+ ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
+ ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W));
+ ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
+ ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W));
+ ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W),
+ ureg_src(temp[3]), ureg_src(temp[2]));
+
+ ureg_MOV(ureg, *out, ureg_src(temp[1]));
+}
+
+static INLINE void
+blend_lighten( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
+ EXTENDED_BLENDER_OVER_FUNC
+ ureg_MUL(ureg, temp[4], ureg_src(temp[0]),
+ ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W));
+ ureg_MUL(ureg, temp[5], ureg_src(temp[1]),
+ ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W));
+ ureg_MAX(ureg, temp[4], ureg_src(temp[4]), ureg_src(temp[5]));
+ ureg_ADD(ureg, temp[1], ureg_src(temp[3]), ureg_src(temp[4]));
+
+ ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
+ ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W));
+ ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
+ ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W));
+ ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W),
+ ureg_src(temp[3]), ureg_src(temp[2]));
+
+ ureg_MOV(ureg, *out, ureg_src(temp[1]));
+}
+
+static INLINE void
+premultiply( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_MUL(ureg,
+ ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ),
+ ureg_src(temp[0]),
+ ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W));
+}
+
+static INLINE void
+unpremultiply( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_TEX(ureg, temp[0], TGSI_TEXTURE_2D, in[0], sampler[1]);
+}
+
+
+static INLINE void
+color_bw( struct ureg_program *ureg,
+ struct ureg_dst *out,
+ struct ureg_src *in,
+ struct ureg_src *sampler,
+ struct ureg_dst *temp,
+ struct ureg_src *constant)
+{
+ ureg_ADD(ureg, temp[1],
+ ureg_scalar(constant[1], TGSI_SWIZZLE_Y),
+ ureg_scalar(constant[1], TGSI_SWIZZLE_Y));
+ ureg_RCP(ureg, temp[2], ureg_src(temp[1]));
+ ureg_ADD(ureg, temp[1],
+ ureg_scalar(constant[1], TGSI_SWIZZLE_Y),
+ ureg_src(temp[2]));
+ ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X),
+ ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X),
+ ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Y));
+ ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X),
+ ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Z),
+ ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X));
+ ureg_SGE(ureg,
+ ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ),
+ ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_X),
+ ureg_src(temp[1]));
+ ureg_SGE(ureg,
+ ureg_writemask(temp[0], TGSI_WRITEMASK_W),
+ ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
+ ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_Y));
+ ureg_MOV(ureg, *out, ureg_src(temp[0]));
+}
struct shader_asm_info {
VGint id;
- VGint num_tokens;
- const char * txt;
+ ureg_func func;
VGboolean needs_position;
@@ -203,44 +412,44 @@ struct shader_asm_info {
static const struct shader_asm_info shaders_asm[] = {
/* fills */
- {VEGA_SOLID_FILL_SHADER, 40, solid_fill_asm,
+ {VEGA_SOLID_FILL_SHADER, solid_fill,
VG_FALSE, 0, 1, 0, 0, 0, 0},
- {VEGA_LINEAR_GRADIENT_SHADER, 200, linear_grad_asm,
+ {VEGA_LINEAR_GRADIENT_SHADER, linear_grad,
VG_TRUE, 0, 5, 0, 1, 0, 5},
- {VEGA_RADIAL_GRADIENT_SHADER, 200, radial_grad_asm,
+ {VEGA_RADIAL_GRADIENT_SHADER, radial_grad,
VG_TRUE, 0, 5, 0, 1, 0, 6},
- {VEGA_PATTERN_SHADER, 100, pattern_asm,
+ {VEGA_PATTERN_SHADER, pattern,
VG_TRUE, 1, 4, 0, 1, 0, 5},
/* image draw modes */
- {VEGA_IMAGE_NORMAL_SHADER, 200, image_normal_asm,
+ {VEGA_IMAGE_NORMAL_SHADER, image_normal,
VG_TRUE, 0, 0, 3, 1, 0, 0},
- {VEGA_IMAGE_MULTIPLY_SHADER, 200, image_multiply_asm,
+ {VEGA_IMAGE_MULTIPLY_SHADER, image_multiply,
VG_TRUE, 0, 0, 3, 1, 0, 2},
- {VEGA_IMAGE_STENCIL_SHADER, 200, image_stencil_asm,
+ {VEGA_IMAGE_STENCIL_SHADER, image_stencil,
VG_TRUE, 0, 0, 3, 1, 0, 2},
- {VEGA_MASK_SHADER, 100, mask_asm,
+ {VEGA_MASK_SHADER, mask,
VG_TRUE, 0, 0, 1, 1, 0, 2},
/* extra blend modes */
- {VEGA_BLEND_MULTIPLY_SHADER, 200, blend_multiply_asm,
+ {VEGA_BLEND_MULTIPLY_SHADER, blend_multiply,
VG_TRUE, 1, 1, 2, 1, 0, 5},
- {VEGA_BLEND_SCREEN_SHADER, 200, blend_screen_asm,
+ {VEGA_BLEND_SCREEN_SHADER, blend_screen,
VG_TRUE, 0, 0, 2, 1, 0, 4},
- {VEGA_BLEND_DARKEN_SHADER, 200, blend_darken_asm,
+ {VEGA_BLEND_DARKEN_SHADER, blend_darken,
VG_TRUE, 1, 1, 2, 1, 0, 6},
- {VEGA_BLEND_LIGHTEN_SHADER, 200, blend_lighten_asm,
+ {VEGA_BLEND_LIGHTEN_SHADER, blend_lighten,
VG_TRUE, 1, 1, 2, 1, 0, 6},
/* premultiply */
- {VEGA_PREMULTIPLY_SHADER, 100, premultiply_asm,
+ {VEGA_PREMULTIPLY_SHADER, premultiply,
VG_FALSE, 0, 0, 0, 0, 0, 1},
- {VEGA_UNPREMULTIPLY_SHADER, 100, unpremultiply_asm,
+ {VEGA_UNPREMULTIPLY_SHADER, unpremultiply,
VG_FALSE, 0, 0, 0, 0, 0, 1},
/* color transform to black and white */
- {VEGA_BW_SHADER, 150, color_bw_asm,
+ {VEGA_BW_SHADER, color_bw,
VG_FALSE, 1, 1, 0, 0, 0, 3},
};
#endif
diff --git a/src/gallium/state_trackers/vega/shaders_cache.c b/src/gallium/state_trackers/vega/shaders_cache.c
index 593e60fc63..f43fe6ee4c 100644
--- a/src/gallium/state_trackers/vega/shaders_cache.c
+++ b/src/gallium/state_trackers/vega/shaders_cache.c
@@ -123,17 +123,23 @@ static INLINE VGint range_max(VGint max, VGint current)
return MAX2(max, current);
}
-static void
-create_preamble(char *txt,
- const struct shader_asm_info *shaders[SHADER_STAGES],
- int num_shaders)
+static void *
+combine_shaders(const struct shader_asm_info *shaders[SHADER_STAGES], int num_shaders,
+ struct pipe_context *pipe,
+ struct pipe_shader_state *shader)
{
VGboolean declare_input = VG_FALSE;
VGint start_const = -1, end_const = 0;
VGint start_temp = -1, end_temp = 0;
VGint start_sampler = -1, end_sampler = 0;
- VGint i;
+ VGint i, current_shader = 0;
VGint num_consts, num_temps, num_samplers;
+ struct ureg_program *ureg;
+ struct ureg_src in[2];
+ struct ureg_src *sampler = NULL;
+ struct ureg_src *constant = NULL;
+ struct ureg_dst out, *temp = NULL;
+ void *p = NULL;
for (i = 0; i < num_shaders; ++i) {
if (shaders[i]->num_consts)
@@ -158,99 +164,94 @@ create_preamble(char *txt,
if (start_temp < 0)
start_temp = 0;
if (start_sampler < 0)
- start_sampler = 0;
+ start_sampler = 0;
num_consts = end_const - start_const;
num_temps = end_temp - start_temp;
num_samplers = end_sampler - start_sampler;
- /* end exclusive */
- --end_const;
- --end_temp;
- --end_sampler;
- sprintf(txt, "FRAG\n");
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (!ureg)
+ return NULL;
if (declare_input) {
- sprintf(txt + strlen(txt), "DCL IN[0], POSITION, LINEAR\n");
- sprintf(txt + strlen(txt), "DCL IN[1], GENERIC[0], PERSPECTIVE\n");
+ in[0] = ureg_DECL_fs_input(ureg,
+ TGSI_SEMANTIC_POSITION,
+ 0,
+ TGSI_INTERPOLATE_LINEAR);
+ in[1] = ureg_DECL_fs_input(ureg,
+ TGSI_SEMANTIC_GENERIC,
+ 0,
+ TGSI_INTERPOLATE_PERSPECTIVE);
}
/* we always have a color output */
- sprintf(txt + strlen(txt), "DCL OUT[0], COLOR, CONSTANT\n");
-
- if (num_consts > 1)
- sprintf(txt + strlen(txt), "DCL CONST[%d..%d], CONSTANT\n", start_const, end_const);
- else if (num_consts == 1)
- sprintf(txt + strlen(txt), "DCL CONST[%d], CONSTANT\n", start_const);
-
- if (num_temps > 1)
- sprintf(txt + strlen(txt), "DCL TEMP[%d..%d], CONSTANT\n", start_temp, end_temp);
- else if (num_temps > 1)
- sprintf(txt + strlen(txt), "DCL TEMP[%d], CONSTANT\n", start_temp);
-
- if (num_samplers > 1)
- sprintf(txt + strlen(txt), "DCL SAMP[%d..%d], CONSTANT\n", start_sampler, end_sampler);
- else if (num_samplers == 1)
- sprintf(txt + strlen(txt), "DCL SAMP[%d], CONSTANT\n", start_sampler);
-}
+ out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
-static void *
-combine_shaders(const struct shader_asm_info *shaders[SHADER_STAGES], int num_shaders,
- struct pipe_context *pipe,
- struct pipe_shader_state *shader)
-{
- char *combined_txt;
- int combined_len = MAX_PREAMBLE;
- int combined_tokens = 0;
- int i = 0;
- int current_shader = 0;
- int current_len;
+ if (num_consts >= 1) {
+ constant = (struct ureg_src *) malloc(sizeof(struct ureg_src) * end_const);
+ for (i = start_const; i < end_const; i++) {
+ constant[i] = ureg_DECL_constant(ureg, i);
+ }
- for (i = 0; i < num_shaders; ++i) {
- combined_len += strlen(shaders[i]->txt);
- combined_tokens += shaders[i]->num_tokens;
}
- /* add for the %s->TEMP[0] substitutions */
- combined_len += num_shaders * 7 /*TEMP[0]*/ + 4 /*"END\n"*/;
- combined_txt = (char*)malloc(combined_len);
- combined_txt[0] = '\0';
+ if (num_temps >= 1) {
+ temp = (struct ureg_dst *) malloc(sizeof(struct ureg_dst) * end_temp);
+ for (i = start_temp; i < end_temp; i++) {
+ temp[i] = ureg_DECL_temporary(ureg);
+ }
+ }
- create_preamble(combined_txt, shaders, num_shaders);
+ if (num_samplers >= 1) {
+ sampler = (struct ureg_src *) malloc(sizeof(struct ureg_src) * end_sampler);
+ for (i = start_sampler; i < end_sampler; i++) {
+ sampler[i] = ureg_DECL_sampler(ureg, i);
+ }
+ }
while (current_shader < num_shaders) {
- const char temp[] = "TEMP[0]";
- const char out[] = "OUT[0]";
- const char *subst = temp;
-
- current_len = strlen(combined_txt);
-
- /* if the last shader then output */
- if (current_shader + 1 == num_shaders)
- subst = out;
-
- snprintf(combined_txt + current_len,
- combined_len - current_len,
- shaders[current_shader]->txt,
- subst);
- ++current_shader;
+ if ((current_shader + 1) == num_shaders) {
+ shaders[current_shader]->func(ureg,
+ &out,
+ in,
+ sampler,
+ temp,
+ constant);
+ } else {
+ shaders[current_shader]->func(ureg,
+ &temp[0],
+ in,
+ sampler,
+ temp,
+ constant);
+ }
+ current_shader++;
}
+ ureg_END(ureg);
- current_len = strlen(combined_txt);
- snprintf(combined_txt + current_len,
- combined_len - current_len,
- "END\n");
+ shader->tokens = ureg_finalize(ureg);
+ if(!shader->tokens)
+ return NULL;
- debug_printf("Combined shader is : \n%s\n",
- combined_txt);
+ p = pipe->create_fs_state(pipe, shader);
+ ureg_destroy(ureg);
- shader->tokens = tokens_from_assembly(
- combined_txt, combined_tokens);
+ if (num_temps >= 1) {
+ for (i = start_temp; i < end_temp; i++) {
+ ureg_release_temporary(ureg, temp[i]);
+ }
+ }
- free(combined_txt);
+ if (temp)
+ free(temp);
+ if (constant)
+ free(constant);
+ if (sampler)
+ free(sampler);
- return pipe->create_fs_state(pipe, shader);
+ return p;
}
static void *