summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/gallivm
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c29
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_limits.h53
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h5
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c207
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h5
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c142
6 files changed, 301 insertions, 140 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 20ae958714..f372a48846 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1177,9 +1177,34 @@ lp_build_rcp(struct lp_build_context *bld,
if(LLVMIsConstant(a))
return LLVMConstFDiv(bld->one, a);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
- /* FIXME: improve precision */
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ /*
+ * XXX: Added precision is not always necessary, so only enable this
+ * when we have a better system in place to track minimum precision.
+ */
+
+#if 0
+ /*
+ * Do one Newton-Raphson step to improve precision:
+ *
+ * x1 = (2 - a * rcp(a)) * rcp(a)
+ */
+
+ LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
+ LLVMValueRef rcp_a;
+ LLVMValueRef res;
+
+ rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+
+ res = LLVMBuildMul(bld->builder, a, rcp_a, "");
+ res = LLVMBuildSub(bld->builder, two, res, "");
+ res = LLVMBuildMul(bld->builder, res, rcp_a, "");
+
+ return rcp_a;
+#else
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+#endif
+ }
return LLVMBuildFDiv(bld->builder, bld->one, a, "");
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
new file mode 100644
index 0000000000..e095a0abe3
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -0,0 +1,53 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_LIMITS_H_
+#define LP_BLD_LIMITS_H_
+
+/*
+ * TGSI translation limits.
+ *
+ * Some are slightly above SM 3.0 requirements to give some wiggle room to
+ * the state trackers.
+ */
+
+#define LP_MAX_TGSI_TEMPS 256
+
+#define LP_MAX_TGSI_ADDRS 16
+
+#define LP_MAX_TGSI_IMMEDIATES 256
+
+/**
+ * Maximum control flow nesting
+ *
+ * SM3.0 requires 24
+ */
+#define LP_MAX_TGSI_NESTING 32
+
+
+#endif /* LP_BLD_LIMITS_H_ */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 8ceb20473d..955b1d21ee 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -173,7 +173,10 @@ lp_build_sample_soa(LLVMBuilderRef builder,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
- LLVMValueRef lodbias,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias,
+ LLVMValueRef explicit_lod,
LLVMValueRef *texel);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 74dc9e1d81..3f0ea05b79 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -30,6 +30,7 @@
* Texture sampling -- SoA.
*
* @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Brian Paul <brianp@vmware.com>
*/
#include "pipe/p_defines.h"
@@ -325,6 +326,18 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
bld->format_desc,
x, y, z, y_stride, z_stride);
+ if (use_border) {
+ /* If we can sample the border color, it means that texcoords may
+ * lie outside the bounds of the texture image. We need to do
+ * something to prevent reading out of bounds and causing a segfault.
+ *
+ * Simply AND the texture coords with !use_border. This will cause
+ * coords which are out of bounds to become zero. Zero's guaranteed
+ * to be inside the texture image.
+ */
+ offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border);
+ }
+
lp_build_fetch_rgba_soa(bld->builder,
bld->format_desc,
bld->texel_type,
@@ -891,17 +904,24 @@ lp_build_minify(struct lp_build_sample_context *bld,
* \param s vector of texcoord s values
* \param t vector of texcoord t values
* \param r vector of texcoord r values
- * \param shader_lod_bias vector float with the shader lod bias,
+ * \param lod_bias optional float vector with the shader lod bias
+ * \param explicit_lod optional float vector with the explicit lod
* \param width scalar int texture width
* \param height scalar int texture height
* \param depth scalar int texture depth
+ *
+ * XXX: The resulting lod is scalar, so ignore all but the first element of
+ * derivatives, lod_bias, etc that are passed by the shader.
*/
static LLVMValueRef
lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
- LLVMValueRef shader_lod_bias,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth)
@@ -914,7 +934,6 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
}
else {
- const int dims = texture_dims(bld->static_state->target);
struct lp_build_context *float_bld = &bld->float_bld;
LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
bld->static_state->lod_bias);
@@ -922,83 +941,76 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
bld->static_state->min_lod);
LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
bld->static_state->max_lod);
-
LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
- LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
-
- LLVMValueRef s0, s1, s2;
- LLVMValueRef t0, t1, t2;
- LLVMValueRef r0, r1, r2;
- LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
- LLVMValueRef rho, lod;
-
- /*
- * dsdx = abs(s[1] - s[0]);
- * dsdy = abs(s[2] - s[0]);
- * dtdx = abs(t[1] - t[0]);
- * dtdy = abs(t[2] - t[0]);
- * drdx = abs(r[1] - r[0]);
- * drdy = abs(r[2] - r[0]);
- * XXX we're assuming a four-element quad in 2x2 layout here.
- */
- s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
- s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
- s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
- dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
- dsdx = lp_build_abs(float_bld, dsdx);
- dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
- dsdy = lp_build_abs(float_bld, dsdy);
- if (dims > 1) {
- t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
- t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
- t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
- dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
- dtdx = lp_build_abs(float_bld, dtdx);
- dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
- dtdy = lp_build_abs(float_bld, dtdy);
- if (dims > 2) {
- r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
- r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
- r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
- drdx = LLVMBuildSub(bld->builder, r1, r0, "");
- drdx = lp_build_abs(float_bld, drdx);
- drdy = LLVMBuildSub(bld->builder, r2, r0, "");
- drdy = lp_build_abs(float_bld, drdy);
- }
+ LLVMValueRef lod;
+
+ if (explicit_lod) {
+ lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
+ index0, "");
}
+ else {
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+ LLVMValueRef rho;
+
+ /*
+ * dsdx = abs(s[1] - s[0]);
+ * dsdy = abs(s[2] - s[0]);
+ * dtdx = abs(t[1] - t[0]);
+ * dtdy = abs(t[2] - t[0]);
+ * drdx = abs(r[1] - r[0]);
+ * drdy = abs(r[2] - r[0]);
+ */
+ dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
+ dsdx = lp_build_abs(float_bld, dsdx);
+ dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
+ dsdy = lp_build_abs(float_bld, dsdy);
+ if (dims > 1) {
+ dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
+ dtdx = lp_build_abs(float_bld, dtdx);
+ dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
+ dtdy = lp_build_abs(float_bld, dtdy);
+ if (dims > 2) {
+ drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
+ drdx = lp_build_abs(float_bld, drdx);
+ drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
+ drdy = lp_build_abs(float_bld, drdy);
+ }
+ }
- /* Compute rho = max of all partial derivatives scaled by texture size.
- * XXX this could be vectorized somewhat
- */
- rho = LLVMBuildMul(bld->builder,
- lp_build_max(float_bld, dsdx, dsdy),
- lp_build_int_to_float(float_bld, width), "");
- if (dims > 1) {
- LLVMValueRef max;
- max = LLVMBuildMul(bld->builder,
- lp_build_max(float_bld, dtdx, dtdy),
- lp_build_int_to_float(float_bld, height), "");
- rho = lp_build_max(float_bld, rho, max);
- if (dims > 2) {
+ /* Compute rho = max of all partial derivatives scaled by texture size.
+ * XXX this could be vectorized somewhat
+ */
+ rho = LLVMBuildMul(bld->builder,
+ lp_build_max(float_bld, dsdx, dsdy),
+ lp_build_int_to_float(float_bld, width), "");
+ if (dims > 1) {
+ LLVMValueRef max;
max = LLVMBuildMul(bld->builder,
- lp_build_max(float_bld, drdx, drdy),
- lp_build_int_to_float(float_bld, depth), "");
+ lp_build_max(float_bld, dtdx, dtdy),
+ lp_build_int_to_float(float_bld, height), "");
rho = lp_build_max(float_bld, rho, max);
+ if (dims > 2) {
+ max = LLVMBuildMul(bld->builder,
+ lp_build_max(float_bld, drdx, drdy),
+ lp_build_int_to_float(float_bld, depth), "");
+ rho = lp_build_max(float_bld, rho, max);
+ }
}
- }
- /* compute lod = log2(rho) */
- lod = lp_build_log2(float_bld, rho);
+ /* compute lod = log2(rho) */
+ lod = lp_build_log2(float_bld, rho);
- /* add sampler lod bias */
- lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler LOD bias");
+ /* add shader lod bias */
+ if (lod_bias) {
+ lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
+ index0, "");
+ lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
+ }
+ }
- /* add shader lod bias */
- /* XXX for now we take only the first element since our lod is scalar */
- shader_lod_bias = LLVMBuildExtractElement(bld->builder, shader_lod_bias,
- LLVMConstInt(LLVMInt32Type(), 0, 0), "");
- lod = LLVMBuildAdd(bld->builder, lod, shader_lod_bias, "shader LOD bias");
+ /* add sampler lod bias */
+ lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
/* clamp lod */
lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
@@ -1584,7 +1596,10 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
- LLVMValueRef lodbias,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth,
@@ -1622,7 +1637,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, s, t, r, lodbias, width, height, depth);
+ lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy,
+ lod_bias, explicit_lod,
+ width, height, depth);
}
/*
@@ -2049,6 +2066,24 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
/**
+ * Just set texels to white instead of actually sampling the texture.
+ * For debugging.
+ */
+static void
+lp_build_sample_nop(struct lp_build_sample_context *bld,
+ LLVMValueRef *texel)
+{
+ struct lp_build_context *texel_bld = &bld->texel_bld;
+ unsigned chan;
+
+ for (chan = 0; chan < 4; chan++) {
+ /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
+ texel[chan] = texel_bld->one;
+ }
+}
+
+
+/**
* Build texture sampling code.
* 'texel' will return a vector of four LLVMValueRefs corresponding to
* R, G, B, A.
@@ -2062,7 +2097,10 @@ lp_build_sample_soa(LLVMBuilderRef builder,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
- LLVMValueRef lodbias,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
LLVMValueRef *texel)
{
struct lp_build_sample_context bld;
@@ -2113,19 +2151,24 @@ lp_build_sample_soa(LLVMBuilderRef builder,
height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
- if (util_format_is_rgba8_variant(bld.format_desc) &&
- static_state->target == PIPE_TEXTURE_2D &&
- static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
- static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
- static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
- is_simple_wrap_mode(static_state->wrap_s) &&
- is_simple_wrap_mode(static_state->wrap_t)) {
+ if (0) {
+ /* For debug: no-op texture sampling */
+ lp_build_sample_nop(&bld, texel);
+ }
+ else if (util_format_is_rgba8_variant(bld.format_desc) &&
+ static_state->target == PIPE_TEXTURE_2D &&
+ static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
+ static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
+ static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
+ is_simple_wrap_mode(static_state->wrap_s) &&
+ is_simple_wrap_mode(static_state->wrap_t)) {
/* special case */
lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
row_stride_array, data_array, texel);
}
else {
- lp_build_sample_general(&bld, unit, s, t, r, lodbias,
+ lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
+ lod_bias, explicit_lod,
width, height, depth,
width_vec, height_vec, depth_vec,
row_stride_array, img_stride_array,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 2eac5da6c6..5ce1385c48 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -65,7 +65,10 @@ struct lp_build_sampler_soa
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
- LLVMValueRef lodbias,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
LLVMValueRef *texel);
};
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index d3c769e28b..0b1a28a7ab 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -54,13 +54,10 @@
#include "lp_bld_swizzle.h"
#include "lp_bld_flow.h"
#include "lp_bld_tgsi.h"
+#include "lp_bld_limits.h"
#include "lp_bld_debug.h"
-#define LP_MAX_TEMPS 256
-#define LP_MAX_IMMEDIATES 256
-
-
#define FOR_EACH_CHANNEL( CHAN )\
for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
@@ -84,7 +81,6 @@
#define QUAD_BOTTOM_LEFT 2
#define QUAD_BOTTOM_RIGHT 3
-#define LP_TGSI_MAX_NESTING 16
struct lp_exec_mask {
struct lp_build_context *bld;
@@ -93,19 +89,19 @@ struct lp_exec_mask {
LLVMTypeRef int_vec_type;
- LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
+ LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
int cond_stack_size;
LLVMValueRef cond_mask;
- LLVMValueRef break_stack[LP_TGSI_MAX_NESTING];
+ LLVMValueRef break_stack[LP_MAX_TGSI_NESTING];
int break_stack_size;
LLVMValueRef break_mask;
- LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING];
+ LLVMValueRef cont_stack[LP_MAX_TGSI_NESTING];
int cont_stack_size;
LLVMValueRef cont_mask;
- LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING];
+ LLVMBasicBlockRef loop_stack[LP_MAX_TGSI_NESTING];
int loop_stack_size;
LLVMBasicBlockRef loop_block;
@@ -124,9 +120,9 @@ struct lp_build_tgsi_soa_context
struct lp_build_sampler_soa *sampler;
- LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
- LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
- LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS];
+ LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
+ LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
+ LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
/* we allocate an array of temps if we have indirect
* addressing and then the temps above is unused */
@@ -198,6 +194,7 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask)
static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
LLVMValueRef val)
{
+ assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
mask->int_vec_type, "");
@@ -239,6 +236,10 @@ static void lp_exec_bgnloop(struct lp_exec_mask *mask)
if (mask->cond_stack_size == 0)
mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
+ assert(mask->break_stack_size < LP_MAX_TGSI_NESTING);
+ assert(mask->cont_stack_size < LP_MAX_TGSI_NESTING);
+ assert(mask->break_stack_size < LP_MAX_TGSI_NESTING);
+
mask->break_stack[mask->break_stack_size++] = mask->break_mask;
mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
@@ -255,16 +256,9 @@ static void lp_exec_break(struct lp_exec_mask *mask)
mask->exec_mask,
"break");
- /* mask->break_stack_size > 1 implies that we encountered a break
- * statemant already and if that's the case we want to make sure
- * our mask is a combination of the previous break and the current
- * execution mask */
- if (mask->break_stack_size > 1) {
- mask->break_mask = LLVMBuildAnd(mask->bld->builder,
- mask->break_mask,
- exec_mask, "break_full");
- } else
- mask->break_mask = exec_mask;
+ mask->break_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->break_mask,
+ exec_mask, "break_full");
lp_exec_mask_update(mask);
}
@@ -275,12 +269,9 @@ static void lp_exec_continue(struct lp_exec_mask *mask)
mask->exec_mask,
"");
- if (mask->cont_stack_size > 1) {
- mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
- mask->cont_mask,
- exec_mask, "");
- } else
- mask->cont_mask = exec_mask;
+ mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->cont_mask,
+ exec_mask, "");
lp_exec_mask_update(mask);
}
@@ -589,7 +580,6 @@ emit_store(
case TGSI_FILE_PREDICATE:
/* FIXME */
- assert(0);
break;
default:
@@ -602,21 +592,37 @@ emit_store(
* High-level instruction translators.
*/
+enum tex_modifier {
+ TEX_MODIFIER_NONE = 0,
+ TEX_MODIFIER_PROJECTED,
+ TEX_MODIFIER_LOD_BIAS,
+ TEX_MODIFIER_EXPLICIT_LOD,
+ TEX_MODIFIER_EXPLICIT_DERIV
+};
static void
emit_tex( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
- boolean apply_lodbias,
- boolean projected,
+ enum tex_modifier modifier,
LLVMValueRef *texel)
{
- const uint unit = inst->Src[1].Register.Index;
- LLVMValueRef lodbias;
+ unsigned unit;
+ LLVMValueRef lod_bias, explicit_lod;
LLVMValueRef oow = NULL;
LLVMValueRef coords[3];
+ LLVMValueRef ddx[3];
+ LLVMValueRef ddy[3];
unsigned num_coords;
unsigned i;
+ if (!bld->sampler) {
+ _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
+ for (i = 0; i < 4; i++) {
+ texel[i] = bld->base.undef;
+ }
+ return;
+ }
+
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_1D:
num_coords = 1;
@@ -637,29 +643,57 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
return;
}
- if(apply_lodbias)
- lodbias = emit_fetch( bld, inst, 0, 3 );
- else
- lodbias = bld->base.zero;
+ if (modifier == TEX_MODIFIER_LOD_BIAS) {
+ lod_bias = emit_fetch( bld, inst, 0, 3 );
+ explicit_lod = NULL;
+ }
+ else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
+ lod_bias = NULL;
+ explicit_lod = emit_fetch( bld, inst, 0, 3 );
+ }
+ else {
+ lod_bias = NULL;
+ explicit_lod = NULL;
+ }
- if (projected) {
+ if (modifier == TEX_MODIFIER_PROJECTED) {
oow = emit_fetch( bld, inst, 0, 3 );
oow = lp_build_rcp(&bld->base, oow);
}
for (i = 0; i < num_coords; i++) {
coords[i] = emit_fetch( bld, inst, 0, i );
- if (projected)
+ if (modifier == TEX_MODIFIER_PROJECTED)
coords[i] = lp_build_mul(&bld->base, coords[i], oow);
}
for (i = num_coords; i < 3; i++) {
coords[i] = bld->base.undef;
}
+ if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
+ for (i = 0; i < num_coords; i++) {
+ ddx[i] = emit_fetch( bld, inst, 1, i );
+ ddy[i] = emit_fetch( bld, inst, 2, i );
+ }
+ unit = inst->Src[3].Register.Index;
+ } else {
+ for (i = 0; i < num_coords; i++) {
+ ddx[i] = emit_ddx( bld, coords[i] );
+ ddy[i] = emit_ddy( bld, coords[i] );
+ }
+ unit = inst->Src[1].Register.Index;
+ }
+ for (i = num_coords; i < 3; i++) {
+ ddx[i] = bld->base.undef;
+ ddy[i] = bld->base.undef;
+ }
+
bld->sampler->emit_fetch_texel(bld->sampler,
bld->base.builder,
bld->base.type,
- unit, num_coords, coords, lodbias,
+ unit, num_coords, coords,
+ ddx, ddy,
+ lod_bias, explicit_lod,
texel);
}
@@ -739,7 +773,7 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
lp_build_mask_update(bld->mask, mask);
}
-static int
+static void
emit_declaration(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_declaration *decl)
@@ -753,6 +787,7 @@ emit_declaration(
for (idx = first; idx <= last; ++idx) {
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
+ assert(idx < LP_MAX_TGSI_TEMPS);
if (bld->has_indirect_addressing) {
LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
last*4 + 4, 0);
@@ -772,18 +807,21 @@ emit_declaration(
break;
case TGSI_FILE_ADDRESS:
+ assert(idx < LP_MAX_TGSI_ADDRS);
for (i = 0; i < NUM_CHANNELS; i++)
bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
vec_type, "");
break;
+ case TGSI_FILE_PREDICATE:
+ _debug_printf("warning: predicate registers not yet implemented\n");
+ break;
+
default:
/* don't need to declare other vars */
break;
}
}
-
- return TRUE;
}
@@ -1359,12 +1397,11 @@ emit_instruction(
break;
case TGSI_OPCODE_TEX:
- emit_tex( bld, inst, FALSE, FALSE, dst0 );
+ emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
break;
case TGSI_OPCODE_TXD:
- /* FIXME */
- return FALSE;
+ emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
break;
case TGSI_OPCODE_UP2H:
@@ -1466,7 +1503,7 @@ emit_instruction(
break;
case TGSI_OPCODE_TXB:
- emit_tex( bld, inst, TRUE, FALSE, dst0 );
+ emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
break;
case TGSI_OPCODE_NRM:
@@ -1571,11 +1608,11 @@ emit_instruction(
break;
case TGSI_OPCODE_TXL:
- emit_tex( bld, inst, TRUE, FALSE, dst0 );
+ emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
break;
case TGSI_OPCODE_TXP:
- emit_tex( bld, inst, FALSE, TRUE, dst0 );
+ emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
break;
case TGSI_OPCODE_BRK:
@@ -1765,10 +1802,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
/* Inputs already interpolated */
- {
- if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
- _debug_printf("warning: failed to define LLVM variable\n");
- }
+ emit_declaration( &bld, &parse.FullToken.FullDeclaration );
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
@@ -1787,7 +1821,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
{
const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
assert(size <= 4);
- assert(num_immediates < LP_MAX_IMMEDIATES);
+ assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
for( i = 0; i < size; ++i )
bld.immediates[num_immediates][i] =
lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);