summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/Makefile1
-rw-r--r--src/gallium/auxiliary/SConscript1
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h13
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c17
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.c15
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.h1
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c14
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_soa.c6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c1
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c36
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.h4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c12
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_struct.c85
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_struct.h41
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.c26
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h35
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c1176
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c46
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer.h6
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c8
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c3
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c6
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c8
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c7
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c6
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c6
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c7
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h30
-rw-r--r--src/gallium/auxiliary/util/u_caps.c28
-rw-r--r--src/gallium/auxiliary/util/u_caps.h4
-rw-r--r--src/gallium/auxiliary/util/u_format.c56
-rw-r--r--src/gallium/auxiliary/util/u_format.h42
-rw-r--r--src/gallium/auxiliary/util/u_inlines.h1
-rw-r--r--src/gallium/auxiliary/util/u_linear.h1
39 files changed, 1654 insertions, 106 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index eb86d83d2a..5388f4ecd5 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -171,6 +171,7 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_sample_soa.c \
gallivm/lp_bld_struct.c \
gallivm/lp_bld_swizzle.c \
+ gallivm/lp_bld_tgsi_aos.c \
gallivm/lp_bld_tgsi_soa.c \
gallivm/lp_bld_type.c \
draw/draw_llvm.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 6210ada990..ba8be2efd1 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -222,6 +222,7 @@ if env['llvm']:
'gallivm/lp_bld_sample_soa.c',
'gallivm/lp_bld_struct.c',
'gallivm/lp_bld_swizzle.c',
+ 'gallivm/lp_bld_tgsi_aos.c',
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
'draw/draw_llvm.c',
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 4c780e4dcb..4f0d30123a 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -39,6 +39,7 @@
#include "pipe/p_state.h"
+#include "tgsi/tgsi_exec.h"
struct pipe_context;
struct draw_context;
@@ -225,4 +226,16 @@ boolean draw_need_pipeline(const struct draw_context *draw,
const struct pipe_rasterizer_state *rasterizer,
unsigned prim );
+static INLINE int
+draw_get_shader_param(unsigned shader, enum pipe_cap param)
+{
+ switch(shader) {
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_GEOMETRY:
+ return tgsi_exec_get_shader_param(param);
+ default:
+ return 0;
+ }
+}
+
#endif /* DRAW_CONTEXT_H */
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index c0135f5bb7..eac21110be 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -373,8 +373,7 @@ generate_aaline_fs(struct aaline_stage *aaline)
aaline->fs->sampler_unit = transform.freeSampler;
- aaline->fs->aaline_fs
- = aaline->driver_create_fs_state(pipe, &aaline_fs);
+ aaline->fs->aaline_fs = aaline->driver_create_fs_state(pipe, &aaline_fs);
if (aaline->fs->aaline_fs == NULL)
goto fail;
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index 1911242f82..68e8295b5e 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -32,6 +32,8 @@
#define DRAW_VS_AOS_H
#include "pipe/p_config.h"
+#include "tgsi/tgsi_exec.h"
+#include "draw_vs.h"
#ifdef PIPE_ARCH_X86
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index e0d30be98d..dce3c3745b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -56,6 +56,7 @@
#include "lp_bld_intr.h"
#include "lp_bld_logic.h"
#include "lp_bld_pack.h"
+#include "lp_bld_debug.h"
#include "lp_bld_arit.h"
@@ -1850,9 +1851,11 @@ lp_build_pow(struct lp_build_context *bld,
LLVMValueRef y)
{
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x) && LLVMIsConstant(y))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x) && LLVMIsConstant(y)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y));
}
@@ -1907,9 +1910,11 @@ lp_build_polynomial(struct lp_build_context *bld,
assert(lp_check_value(bld->type, x));
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
for (i = num_coeffs; i--; ) {
LLVMValueRef coeff;
@@ -1981,9 +1986,11 @@ lp_build_exp2_approx(struct lp_build_context *bld,
if(p_exp2_int_part || p_frac_part || p_exp2) {
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
assert(type.floating && type.width == 32);
@@ -2096,9 +2103,11 @@ lp_build_log2_approx(struct lp_build_context *bld,
if(p_exp || p_floor_log2 || p_log2) {
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
assert(type.floating && type.width == 32);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c
index e42ff31ac7..dd839c0bea 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -382,9 +382,12 @@ lp_build_const_aos(struct lp_type type,
}
+/**
+ * @param mask TGSI_WRITEMASK_xxx
+ */
LLVMValueRef
lp_build_const_mask_aos(struct lp_type type,
- const boolean cond[4])
+ unsigned mask)
{
LLVMTypeRef elem_type = LLVMIntType(type.width);
LLVMValueRef masks[LP_MAX_VECTOR_LENGTH];
@@ -392,9 +395,13 @@ lp_build_const_mask_aos(struct lp_type type,
assert(type.length <= LP_MAX_VECTOR_LENGTH);
- for(j = 0; j < type.length; j += 4)
- for(i = 0; i < 4; ++i)
- masks[j + i] = LLVMConstInt(elem_type, cond[i] ? ~0 : 0, 0);
+ for (j = 0; j < type.length; j += 4) {
+ for( i = 0; i < 4; ++i) {
+ masks[j + i] = LLVMConstInt(elem_type,
+ mask & (1 << i) ? ~0ULL : 0,
+ 1);
+ }
+ }
return LLVMConstVector(masks, type.length);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h
index 7ee8fff140..6b1fc590c1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h
@@ -104,7 +104,7 @@ lp_build_const_aos(struct lp_type type,
LLVMValueRef
lp_build_const_mask_aos(struct lp_type type,
- const boolean cond[4]);
+ unsigned mask);
static INLINE LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
index 858002b34f..369c1bbf09 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -40,6 +40,7 @@
#define GALLIVM_DEBUG_IR 0x2
#define GALLIVM_DEBUG_ASM 0x4
#define GALLIVM_DEBUG_NO_OPT 0x8
+#define GALLIVM_DEBUG_PERF 0x10
#ifdef DEBUG
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 92123e09d3..6b9189e1da 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -46,6 +46,7 @@
#include "lp_bld_conv.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_gather.h"
+#include "lp_bld_debug.h"
#include "lp_bld_format.h"
@@ -449,6 +450,11 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
* integer conversions.
*/
+ if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
+ debug_printf("%s: unpacking %s with floating point\n",
+ __FUNCTION__, format_desc->short_name);
+ }
+
lp_build_conv(builder,
lp_float32_vec4_type(),
type,
@@ -513,6 +519,10 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm",
format_desc->short_name);
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
+ }
+
/*
* Declare and bind format_desc->fetch_rgba_8unorm().
*/
@@ -612,6 +622,10 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
format_desc->short_name);
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
+ }
+
/*
* Declare and bind format_desc->fetch_rgba_float().
*/
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index c724a4453e..ce7e54afc7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -37,6 +37,7 @@
#include "lp_bld_conv.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_gather.h"
+#include "lp_bld_debug.h"
#include "lp_bld_format.h"
@@ -387,6 +388,11 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
unsigned k, chan;
struct lp_type tmp_type;
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: scalar unpacking of %s\n",
+ __FUNCTION__, format_desc->short_name);
+ }
+
tmp_type = type;
tmp_type.length = 4;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 60d8bcfa55..761f33b578 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -43,6 +43,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
{ "ir", GALLIVM_DEBUG_IR, NULL },
{ "asm", GALLIVM_DEBUG_ASM, NULL },
{ "nopt", GALLIVM_DEBUG_NO_OPT, NULL },
+ { "perf", GALLIVM_DEBUG_PERF, NULL },
DEBUG_NAMED_VALUE_END
};
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 7d7db3b0d9..a959bd4ad4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -40,6 +40,7 @@
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
+#include "lp_bld_debug.h"
#include "lp_bld_logic.h"
@@ -325,8 +326,10 @@ lp_build_compare(LLVMBuilderRef builder,
res = LLVMGetUndef(int_vec_type);
- debug_printf("%s: warning: using slow element-wise int"
- " vector comparison\n", __FUNCTION__);
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: using slow element-wise int"
+ " vector comparison\n", __FUNCTION__);
+ }
for(i = 0; i < type.length; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
@@ -482,24 +485,30 @@ lp_build_select(struct lp_build_context *bld,
}
+/**
+ * Return mask ? a : b;
+ *
+ * mask is a TGSI_WRITEMASK_xxx.
+ */
LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
+ unsigned mask,
LLVMValueRef a,
- LLVMValueRef b,
- const boolean cond[4])
+ LLVMValueRef b)
{
const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
+ assert((mask & ~0xf) == 0);
assert(lp_check_value(type, a));
assert(lp_check_value(type, b));
if(a == b)
return a;
- if(cond[0] && cond[1] && cond[2] && cond[3])
+ if((mask & 0xf) == 0xf)
return a;
- if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
+ if((mask & 0xf) == 0x0)
return b;
if(a == bld->undef || b == bld->undef)
return bld->undef;
@@ -522,7 +531,9 @@ lp_build_select_aos(struct lp_build_context *bld,
for(j = 0; j < n; j += 4)
for(i = 0; i < 4; ++i)
- shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
+ shuffles[j + i] = LLVMConstInt(elem_type,
+ (mask & (1 << i) ? 0 : n) + j + i,
+ 0);
return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
}
@@ -531,16 +542,17 @@ lp_build_select_aos(struct lp_build_context *bld,
/* XXX: Unfortunately select of vectors do not work */
/* Use a select */
LLVMTypeRef elem_type = LLVMInt1Type();
- LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef cond_vec[LP_MAX_VECTOR_LENGTH];
for(j = 0; j < n; j += 4)
for(i = 0; i < 4; ++i)
- cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
+ cond_vec[j + i] = LLVMConstInt(elem_type,
+ mask & (1 << i) ? 1 : 0, 0);
- return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
+ return LLVMBuildSelect(bld->builder, LLVMConstVector(cond_vec, n), a, b, "");
#else
- LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
- return lp_build_select(bld, mask, a, b);
+ LLVMValueRef mask_vec = lp_build_const_mask_aos(type, mask);
+ return lp_build_select(bld, mask_vec, a, b);
#endif
}
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
index 4e7b4c9938..111daad971 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -77,9 +77,9 @@ lp_build_select(struct lp_build_context *bld,
LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
+ unsigned mask,
LLVMValueRef a,
- LLVMValueRef b,
- const boolean cond[4]);
+ LLVMValueRef b);
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index b7b630f2e8..f7eb7148ab 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -111,8 +111,6 @@ lp_build_const_pack_shuffle(unsigned n)
assert(n <= LP_MAX_VECTOR_LENGTH);
- /* TODO: cache results in a static table */
-
for(i = 0; i < n; ++i)
elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 1f39d9c98b..baf0402f56 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -2190,9 +2190,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
lp_build_sample_nop(&bld, texel_out);
}
else if (util_format_fits_8unorm(bld.format_desc) &&
- bld.format_desc->nr_channels > 1 &&
(static_state->target == PIPE_TEXTURE_2D ||
- static_state->target == PIPE_TEXTURE_RECT) &&
+ static_state->target == PIPE_TEXTURE_RECT) &&
static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
@@ -2203,6 +2202,15 @@ lp_build_sample_soa(LLVMBuilderRef builder,
row_stride_array, data_array, texel_out);
}
else {
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ (static_state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
+ static_state->mag_img_filter != PIPE_TEX_FILTER_NEAREST ||
+ static_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) &&
+ util_format_fits_8unorm(bld.format_desc)) {
+ debug_printf("%s: using floating point linear filtering for %s\n",
+ __FUNCTION__, bld.format_desc->short_name);
+ }
+
lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
lod_bias, explicit_lod,
width, height, depth,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.c b/src/gallium/auxiliary/gallivm/lp_bld_struct.c
index 3998ac374f..4693c2de6f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_struct.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.c
@@ -49,6 +49,8 @@ lp_build_struct_get_ptr(LLVMBuilderRef builder,
{
LLVMValueRef indices[2];
LLVMValueRef member_ptr;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMStructTypeKind);
indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
@@ -65,8 +67,91 @@ lp_build_struct_get(LLVMBuilderRef builder,
{
LLVMValueRef member_ptr;
LLVMValueRef res;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMStructTypeKind);
member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name);
res = LLVMBuildLoad(builder, member_ptr, "");
lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name);
return res;
}
+
+
+LLVMValueRef
+lp_build_array_get_ptr(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index)
+{
+ LLVMValueRef indices[2];
+ LLVMValueRef element_ptr;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind);
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indices[1] = index;
+ element_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+#ifdef DEBUG
+ lp_build_name(element_ptr, "&%s[%s]",
+ LLVMGetValueName(ptr), LLVMGetValueName(index));
+#endif
+ return element_ptr;
+}
+
+
+LLVMValueRef
+lp_build_array_get(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index)
+{
+ LLVMValueRef element_ptr;
+ LLVMValueRef res;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind);
+ element_ptr = lp_build_array_get_ptr(builder, ptr, index);
+ res = LLVMBuildLoad(builder, element_ptr, "");
+#ifdef DEBUG
+ lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index));
+#endif
+ return res;
+}
+
+
+void
+lp_build_array_set(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index,
+ LLVMValueRef value)
+{
+ LLVMValueRef element_ptr;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind);
+ element_ptr = lp_build_array_get_ptr(builder, ptr, index);
+ LLVMBuildStore(builder, value, element_ptr);
+}
+
+
+LLVMValueRef
+lp_build_pointer_get(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index)
+{
+ LLVMValueRef element_ptr;
+ LLVMValueRef res;
+ assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+ element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
+ res = LLVMBuildLoad(builder, element_ptr, "");
+#ifdef DEBUG
+ lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index));
+#endif
+ return res;
+}
+
+
+void
+lp_build_pointer_set(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index,
+ LLVMValueRef value)
+{
+ LLVMValueRef element_ptr;
+ element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
+ LLVMBuildStore(builder, value, element_ptr);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h
index 147336edb4..eb87a8eee9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h
@@ -71,5 +71,46 @@ lp_build_struct_get(LLVMBuilderRef builder,
unsigned member,
const char *name);
+/**
+ * Get value pointer to an array element.
+ */
+LLVMValueRef
+lp_build_array_get_ptr(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index);
+
+/**
+ * Get the value of an array element.
+ */
+LLVMValueRef
+lp_build_array_get(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index);
+
+/**
+ * Set the value of an array element.
+ */
+void
+lp_build_array_set(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index,
+ LLVMValueRef value);
+
+/**
+ * Get the value of an array element.
+ */
+LLVMValueRef
+lp_build_pointer_get(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index);
+
+/**
+ * Set the value of an array element.
+ */
+void
+lp_build_pointer_set(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ LLVMValueRef index,
+ LLVMValueRef value);
#endif /* !LP_BLD_STRUCT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
index 20cf96ca66..d4d2f2b307 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -95,10 +95,13 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
}
+/**
+ * Swizzle one channel into all other three channels.
+ */
LLVMValueRef
-lp_build_broadcast_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- unsigned channel)
+lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
+ LLVMValueRef a,
+ unsigned channel)
{
const struct lp_type type = bld->type;
const unsigned n = type.length;
@@ -139,13 +142,10 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
{ 1, -2},
{-1, -2}
};
- boolean cond[4];
unsigned i;
- memset(cond, 0, sizeof cond);
- cond[channel] = 1;
-
- a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), "");
+ a = LLVMBuildAnd(bld->builder, a,
+ lp_build_const_mask_aos(type, 1 << channel), "");
/*
* Build a type where each element is an integer that cover the four
@@ -206,7 +206,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
case PIPE_SWIZZLE_GREEN:
case PIPE_SWIZZLE_BLUE:
case PIPE_SWIZZLE_ALPHA:
- return lp_build_broadcast_aos(bld, a, swizzles[0]);
+ return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]);
case PIPE_SWIZZLE_ZERO:
return bld->zero;
case PIPE_SWIZZLE_ONE:
@@ -282,7 +282,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
*/
LLVMValueRef res;
struct lp_type type4;
- boolean cond[4];
+ unsigned cond = 0;
unsigned chan;
int shift;
@@ -290,9 +290,11 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
* Start with a mixture of 1 and 0.
*/
for (chan = 0; chan < 4; ++chan) {
- cond[chan] = swizzles[chan] == PIPE_SWIZZLE_ONE ? TRUE : FALSE;
+ if (swizzles[chan] == PIPE_SWIZZLE_ONE) {
+ cond |= 1 << chan;
+ }
}
- res = lp_build_select_aos(bld, bld->one, bld->zero, cond);
+ res = lp_build_select_aos(bld, cond, bld->one, bld->zero);
/*
* Build a type where each element is an integer that cover the four
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
index 315e1bcb54..f9b6a5e725 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
@@ -60,7 +60,7 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
* all four channel.
*/
LLVMValueRef
-lp_build_broadcast_aos(struct lp_build_context *bld,
+lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
LLVMValueRef a,
unsigned channel);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 4d415b6d41..97318b3456 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -45,6 +45,15 @@ struct lp_build_context;
struct lp_build_mask_context;
+enum lp_build_tex_modifier {
+ LP_BLD_TEX_MODIFIER_NONE = 0,
+ LP_BLD_TEX_MODIFIER_PROJECTED,
+ LP_BLD_TEX_MODIFIER_LOD_BIAS,
+ LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
+ LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
+};
+
+
/**
* Sampler code generation interface.
*
@@ -73,6 +82,20 @@ struct lp_build_sampler_soa
};
+struct lp_build_sampler_aos
+{
+ LLVMValueRef
+ (*emit_fetch_texel)( struct lp_build_sampler_aos *sampler,
+ struct lp_build_context *bld,
+ unsigned target, /* TGSI_TEXTURE_* */
+ unsigned unit,
+ LLVMValueRef coords,
+ LLVMValueRef ddx,
+ LLVMValueRef ddy,
+ enum lp_build_tex_modifier modifier);
+};
+
+
void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
@@ -86,4 +109,16 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_shader_info *info);
+void
+lp_build_tgsi_aos(LLVMBuilderRef builder,
+ const struct tgsi_token *tokens,
+ struct lp_type type,
+ const unsigned char swizzles[4],
+ LLVMValueRef consts_ptr,
+ const LLVMValueRef *inputs,
+ LLVMValueRef *outputs,
+ struct lp_build_sampler_aos *sampler,
+ const struct tgsi_shader_info *info);
+
+
#endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
new file mode 100644
index 0000000000..d5f963be58
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -0,0 +1,1176 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * TGSI to LLVM IR translation -- AoS.
+ *
+ * FIXME:
+ * - No control flow support: the existing control flow code should be factored
+ * out into from the SoA code into a common module and shared.
+ * - No derivatives. Derivate logic should be pluggable, just like the samplers.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_config.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_scan.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_flow.h"
+#include "lp_bld_quad.h"
+#include "lp_bld_tgsi.h"
+#include "lp_bld_limits.h"
+#include "lp_bld_debug.h"
+
+
+#define LP_MAX_INSTRUCTIONS 256
+
+
+struct lp_build_tgsi_aos_context
+{
+ struct lp_build_context base;
+
+ /* Builder for integer masks and indices */
+ struct lp_build_context int_bld;
+
+ /*
+ * AoS swizzle used:
+ * - swizzles[0] = red index
+ * - swizzles[1] = green index
+ * - swizzles[2] = blue index
+ * - swizzles[3] = alpha index
+ */
+ unsigned char swizzles[4];
+ unsigned char inv_swizzles[4];
+
+ LLVMValueRef consts_ptr;
+ const LLVMValueRef *inputs;
+ LLVMValueRef *outputs;
+
+ struct lp_build_sampler_aos *sampler;
+
+ LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
+ LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
+ LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
+ LLVMValueRef preds[LP_MAX_TGSI_PREDS];
+
+ /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
+ * set in the indirect_files field.
+ * The temps[] array above is unused then.
+ */
+ LLVMValueRef temps_array;
+
+ /** bitmask indicating which register files are accessed indirectly */
+ unsigned indirect_files;
+
+ struct tgsi_full_instruction *instructions;
+ uint max_instructions;
+};
+
+
+/**
+ * Wrapper around lp_build_swizzle_aos which translates swizzles to another
+ * ordering.
+ */
+static LLVMValueRef
+swizzle_aos(struct lp_build_tgsi_aos_context *bld,
+ LLVMValueRef a,
+ unsigned swizzle_x,
+ unsigned swizzle_y,
+ unsigned swizzle_z,
+ unsigned swizzle_w)
+{
+ unsigned char swizzles[4];
+
+ assert(swizzle_x < 4);
+ assert(swizzle_y < 4);
+ assert(swizzle_z < 4);
+ assert(swizzle_w < 4);
+
+ swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
+ swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
+ swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
+ swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
+
+ return lp_build_swizzle_aos(&bld->base, a, swizzles);
+}
+
+
+static LLVMValueRef
+swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
+ LLVMValueRef a,
+ unsigned chan)
+{
+ chan = bld->swizzles[chan];
+ return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
+}
+
+
+/**
+ * Register fetch.
+ */
+static LLVMValueRef
+emit_fetch(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned src_op)
+{
+ struct lp_type type = bld->base.type;
+ const struct tgsi_full_src_register *reg = &inst->Src[src_op];
+ LLVMValueRef res;
+ unsigned chan;
+
+ assert(!reg->Register.Indirect);
+
+ /*
+ * Fetch the from the register file.
+ */
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_CONSTANT:
+ /*
+ * Get the constants components
+ */
+
+ res = bld->base.undef;
+ for (chan = 0; chan < 4; ++chan) {
+ LLVMValueRef index;
+ LLVMValueRef scalar_ptr;
+ LLVMValueRef scalar;
+ LLVMValueRef swizzle;
+
+ index = LLVMConstInt(LLVMInt32Type(),
+ reg->Register.Index*4 + chan,
+ 0);
+
+ scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
+ &index, 1, "");
+
+ scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+
+ lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
+
+ /*
+ * NOTE: constants array is always assumed to be RGBA
+ */
+
+ swizzle = LLVMConstInt(LLVMInt32Type(), chan, 0);
+
+ res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, "");
+ }
+
+ /*
+ * Broadcast the first quaternion to all others.
+ *
+ * XXX: could be factored into a reusable function.
+ */
+
+ if (type.length > 4) {
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ for (chan = 0; chan < 4; ++chan) {
+ shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0);
+ }
+
+ for (i = 4; i < type.length; ++i) {
+ shuffles[i] = shuffles[i % 4];
+ }
+
+ res = LLVMBuildShuffleVector(bld->base.builder,
+ res, bld->base.undef,
+ LLVMConstVector(shuffles, type.length),
+ "");
+ }
+ break;
+
+ case TGSI_FILE_IMMEDIATE:
+ res = bld->immediates[reg->Register.Index];
+ assert(res);
+ break;
+
+ case TGSI_FILE_INPUT:
+ res = bld->inputs[reg->Register.Index];
+ assert(res);
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ {
+ LLVMValueRef temp_ptr;
+ temp_ptr = bld->temps[reg->Register.Index];
+ res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
+ if (!res)
+ return bld->base.undef;
+ }
+ break;
+
+ default:
+ assert(0 && "invalid src register in emit_fetch()");
+ return bld->base.undef;
+ }
+
+ /*
+ * Apply sign modifier.
+ */
+
+ if (reg->Register.Absolute) {
+ res = lp_build_abs(&bld->base, res);
+ }
+
+ if(reg->Register.Negate) {
+ res = lp_build_negate(&bld->base, res);
+ }
+
+ /*
+ * Swizzle the argument
+ */
+
+ res = swizzle_aos(bld, res,
+ reg->Register.SwizzleX,
+ reg->Register.SwizzleY,
+ reg->Register.SwizzleZ,
+ reg->Register.SwizzleW);
+
+ return res;
+}
+
+
+/**
+ * Register store.
+ */
+static void
+emit_store(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
+ LLVMValueRef value)
+{
+ const struct tgsi_full_dst_register *reg = &inst->Dst[index];
+ LLVMValueRef mask = NULL;
+ LLVMValueRef ptr;
+
+ /*
+ * Saturate the value
+ */
+
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_NONE:
+ break;
+
+ case TGSI_SAT_ZERO_ONE:
+ value = lp_build_max(&bld->base, value, bld->base.zero);
+ value = lp_build_min(&bld->base, value, bld->base.one);
+ break;
+
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
+ value = lp_build_min(&bld->base, value, bld->base.one);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ /*
+ * Translate the register file
+ */
+
+ assert(!reg->Register.Indirect);
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_OUTPUT:
+ ptr = bld->outputs[reg->Register.Index];
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ ptr = bld->temps[reg->Register.Index];
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ ptr = bld->addr[reg->Indirect.Index];
+ break;
+
+ case TGSI_FILE_PREDICATE:
+ ptr = bld->preds[reg->Register.Index];
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+
+ /*
+ * Predicate
+ */
+
+ if (inst->Instruction.Predicate) {
+ LLVMValueRef pred;
+
+ assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
+
+ pred = LLVMBuildLoad(bld->base.builder,
+ bld->preds[inst->Predicate.Index], "");
+
+ /*
+ * Convert the value to an integer mask.
+ */
+ pred = lp_build_compare(bld->base.builder,
+ bld->base.type,
+ PIPE_FUNC_NOTEQUAL,
+ pred,
+ bld->base.zero);
+
+ if (inst->Predicate.Negate) {
+ pred = LLVMBuildNot(bld->base.builder, pred, "");
+ }
+
+ pred = swizzle_aos(bld, pred,
+ inst->Predicate.SwizzleX,
+ inst->Predicate.SwizzleY,
+ inst->Predicate.SwizzleZ,
+ inst->Predicate.SwizzleW);
+
+ if (mask) {
+ mask = LLVMBuildAnd(bld->base.builder, mask, pred, "");
+ } else {
+ mask = pred;
+ }
+ }
+
+ /*
+ * Writemask
+ */
+
+ if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
+ LLVMValueRef writemask;
+
+ writemask = lp_build_const_mask_aos(bld->base.type, reg->Register.WriteMask);
+
+ if (mask) {
+ mask = LLVMBuildAnd(bld->base.builder, mask, writemask, "");
+ } else {
+ mask = writemask;
+ }
+ }
+
+ if (mask) {
+ LLVMValueRef orig_value;
+
+ orig_value = LLVMBuildLoad(bld->base.builder, ptr, "");
+ value = lp_build_select(&bld->base,
+ mask, value, orig_value);
+ }
+
+ LLVMBuildStore(bld->base.builder, value, ptr);
+}
+
+
+/**
+ * High-level instruction translators.
+ */
+
+static LLVMValueRef
+emit_tex(struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ enum lp_build_tex_modifier modifier)
+{
+ unsigned target;
+ unsigned unit;
+ LLVMValueRef coords;
+ LLVMValueRef ddx;
+ LLVMValueRef ddy;
+
+ if (!bld->sampler) {
+ _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
+ return bld->base.undef;
+ }
+
+ target = inst->Texture.Texture;
+
+ coords = emit_fetch( bld, inst, 0 );
+
+ if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
+ ddx = emit_fetch( bld, inst, 1 );
+ ddy = emit_fetch( bld, inst, 2 );
+ unit = inst->Src[3].Register.Index;
+ } else {
+#if 0
+ ddx = lp_build_ddx( &bld->base, coords );
+ ddy = lp_build_ddy( &bld->base, coords );
+#else
+ /* TODO */
+ ddx = bld->base.one;
+ ddy = bld->base.one;
+#endif
+ unit = inst->Src[1].Register.Index;
+ }
+
+ return bld->sampler->emit_fetch_texel(bld->sampler,
+ &bld->base,
+ target, unit,
+ coords, ddx, ddy,
+ modifier);
+}
+
+
+static void
+emit_declaration(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_declaration *decl)
+{
+ LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
+
+ unsigned first = decl->Range.First;
+ unsigned last = decl->Range.Last;
+ unsigned idx;
+
+ for (idx = first; idx <= last; ++idx) {
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ assert(idx < LP_MAX_TGSI_TEMPS);
+ if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
+ last + 1, 0);
+ bld->temps_array = lp_build_array_alloca(bld->base.builder,
+ vec_type, array_size, "");
+ } else {
+ bld->temps[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ }
+ break;
+
+ case TGSI_FILE_OUTPUT:
+ bld->outputs[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ assert(idx < LP_MAX_TGSI_ADDRS);
+ bld->addr[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
+ case TGSI_FILE_PREDICATE:
+ assert(idx < LP_MAX_TGSI_PREDS);
+ bld->preds[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
+ default:
+ /* don't need to declare other vars */
+ break;
+ }
+ }
+}
+
+
+/**
+ * Emit LLVM for one TGSI instruction.
+ * \param return TRUE for success, FALSE otherwise
+ */
+static boolean
+emit_instruction(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ const struct tgsi_opcode_info *info,
+ int *pc)
+{
+ LLVMValueRef src0, src1, src2;
+ LLVMValueRef tmp0, tmp1;
+ LLVMValueRef dst0;
+
+ /*
+ * Stores and write masks are handled in a general fashion after the long
+ * instruction opcode switch statement.
+ *
+ * Although not stricitly necessary, we avoid generating instructions for
+ * channels which won't be stored, in cases where's that easy. For some
+ * complex instructions, like texture sampling, it is more convenient to
+ * assume a full writemask and then let LLVM optimization passes eliminate
+ * redundant code.
+ */
+
+ (*pc)++;
+
+ assert(info->num_dst <= 1);
+ if (info->num_dst) {
+ dst0 = bld->base.undef;
+ }
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_floor(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_MOV:
+ dst0 = emit_fetch(bld, inst, 0);
+ break;
+
+ case TGSI_OPCODE_LIT:
+ return FALSE;
+
+ case TGSI_OPCODE_RCP:
+ /* TGSI_OPCODE_RECIP */
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_rcp(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ /* TGSI_OPCODE_RECIPSQRT */
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_abs(&bld->base, src0);
+ dst0 = lp_build_rsqrt(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_EXP:
+ return FALSE;
+
+ case TGSI_OPCODE_LOG:
+ return FALSE;
+
+ case TGSI_OPCODE_MUL:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_mul(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_ADD:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_add(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_DP3:
+ /* TGSI_OPCODE_DOT3 */
+ return FALSE;
+
+ case TGSI_OPCODE_DP4:
+ /* TGSI_OPCODE_DOT4 */
+ return FALSE;
+
+ case TGSI_OPCODE_DST:
+ return FALSE;
+
+ case TGSI_OPCODE_MIN:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_max(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_MAX:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_max(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_SLT:
+ /* TGSI_OPCODE_SETLT */
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SGE:
+ /* TGSI_OPCODE_SETGE */
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_MAD:
+ /* TGSI_OPCODE_MADD */
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_mul(&bld->base, src0, src1);
+ dst0 = lp_build_add(&bld->base, tmp0, src2);
+ break;
+
+ case TGSI_OPCODE_SUB:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_sub(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_LRP:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_sub(&bld->base, src1, src2);
+ tmp0 = lp_build_mul(&bld->base, src0, tmp0);
+ dst0 = lp_build_add(&bld->base, tmp0, src2);
+ break;
+
+ case TGSI_OPCODE_CND:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp1 = lp_build_const_vec(bld->base.type, 0.5);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
+ dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
+ break;
+
+ case TGSI_OPCODE_DP2A:
+ return FALSE;
+
+ case TGSI_OPCODE_FRC:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_floor(&bld->base, src0);
+ dst0 = lp_build_sub(&bld->base, src0, tmp0);
+ break;
+
+ case TGSI_OPCODE_CLAMP:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_max(&bld->base, src0, src1);
+ dst0 = lp_build_min(&bld->base, tmp0, src2);
+ break;
+
+ case TGSI_OPCODE_FLR:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_floor(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_ROUND:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_round(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_EX2:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_exp2(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_LG2:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_log2(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_POW:
+ src0 = emit_fetch(bld, inst, 0);
+ src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
+ src1 = emit_fetch(bld, inst, 1);
+ src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
+ dst0 = lp_build_pow(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_XPD:
+ return FALSE;
+
+ case TGSI_OPCODE_ABS:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_abs(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_RCC:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+
+ case TGSI_OPCODE_DPH:
+ return FALSE;
+
+ case TGSI_OPCODE_COS:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_cos(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_DDX:
+ return FALSE;
+
+ case TGSI_OPCODE_DDY:
+ return FALSE;
+
+ case TGSI_OPCODE_KILP:
+ /* predicated kill */
+ return FALSE;
+
+ case TGSI_OPCODE_KIL:
+ /* conditional kill */
+ return FALSE;
+
+ case TGSI_OPCODE_PK2H:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_PK2US:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_PK4B:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_PK4UB:
+ return FALSE;
+
+ case TGSI_OPCODE_RFL:
+ return FALSE;
+
+ case TGSI_OPCODE_SEQ:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SFL:
+ dst0 = bld->base.zero;
+ break;
+
+ case TGSI_OPCODE_SGT:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SIN:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_sin(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_SLE:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SNE:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_STR:
+ dst0 = bld->base.one;
+ break;
+
+ case TGSI_OPCODE_TEX:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
+ break;
+
+ case TGSI_OPCODE_TXD:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
+ break;
+
+ case TGSI_OPCODE_UP2H:
+ /* deprecated */
+ assert (0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_UP2US:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_UP4B:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_UP4UB:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_X2D:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ARA:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ARR:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_round(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_BRA:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_CAL:
+ return FALSE;
+
+ case TGSI_OPCODE_RET:
+ return FALSE;
+
+ case TGSI_OPCODE_END:
+ *pc = -1;
+ break;
+
+ case TGSI_OPCODE_SSG:
+ /* TGSI_OPCODE_SGN */
+ tmp0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_sgn(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_CMP:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
+ dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
+ break;
+
+ case TGSI_OPCODE_SCS:
+ return FALSE;
+
+ case TGSI_OPCODE_TXB:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
+ break;
+
+ case TGSI_OPCODE_NRM:
+ /* fall-through */
+ case TGSI_OPCODE_NRM4:
+ return FALSE;
+
+ case TGSI_OPCODE_DIV:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_DP2:
+ return FALSE;
+
+ case TGSI_OPCODE_TXL:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
+ break;
+
+ case TGSI_OPCODE_TXP:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
+ break;
+
+ case TGSI_OPCODE_BRK:
+ return FALSE;
+
+ case TGSI_OPCODE_IF:
+ return FALSE;
+
+ case TGSI_OPCODE_BGNLOOP:
+ return FALSE;
+
+ case TGSI_OPCODE_BGNSUB:
+ return FALSE;
+
+ case TGSI_OPCODE_ELSE:
+ return FALSE;
+
+ case TGSI_OPCODE_ENDIF:
+ return FALSE;
+
+ case TGSI_OPCODE_ENDLOOP:
+ return FALSE;
+
+ case TGSI_OPCODE_ENDSUB:
+ return FALSE;
+
+ case TGSI_OPCODE_PUSHA:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_POPA:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_CEIL:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_ceil(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_I2F:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_NOT:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_trunc(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_SHL:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ISHR:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_AND:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_OR:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_MOD:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_XOR:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_SAD:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_TXF:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_TXQ:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_CONT:
+ return FALSE;
+
+ case TGSI_OPCODE_EMIT:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ENDPRIM:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_NOP:
+ break;
+
+ default:
+ return FALSE;
+ }
+
+ if (info->num_dst) {
+ emit_store(bld, inst, 0, dst0);
+ }
+
+ return TRUE;
+}
+
+
+void
+lp_build_tgsi_aos(LLVMBuilderRef builder,
+ const struct tgsi_token *tokens,
+ struct lp_type type,
+ const unsigned char swizzles[4],
+ LLVMValueRef consts_ptr,
+ const LLVMValueRef *inputs,
+ LLVMValueRef *outputs,
+ struct lp_build_sampler_aos *sampler,
+ const struct tgsi_shader_info *info)
+{
+ struct lp_build_tgsi_aos_context bld;
+ struct tgsi_parse_context parse;
+ uint num_immediates = 0;
+ uint num_instructions = 0;
+ unsigned chan;
+ int pc = 0;
+
+ /* Setup build context */
+ memset(&bld, 0, sizeof bld);
+ lp_build_context_init(&bld.base, builder, type);
+ lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
+
+ for (chan = 0; chan < 4; ++chan) {
+ bld.swizzles[chan] = swizzles[chan];
+ bld.inv_swizzles[swizzles[chan]] = chan;
+ }
+
+ bld.inputs = inputs;
+ bld.outputs = outputs;
+ bld.consts_ptr = consts_ptr;
+ bld.sampler = sampler;
+ bld.indirect_files = info->indirect_files;
+ bld.instructions = (struct tgsi_full_instruction *)
+ MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
+ bld.max_instructions = LP_MAX_INSTRUCTIONS;
+
+ if (!bld.instructions) {
+ return;
+ }
+
+ tgsi_parse_init(&parse, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch(parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /* Inputs already interpolated */
+ emit_declaration(&bld, &parse.FullToken.FullDeclaration);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ /* save expanded instruction */
+ if (num_instructions == bld.max_instructions) {
+ struct tgsi_full_instruction *instructions;
+ instructions = REALLOC(bld.instructions,
+ bld.max_instructions
+ * sizeof(struct tgsi_full_instruction),
+ (bld.max_instructions + LP_MAX_INSTRUCTIONS)
+ * sizeof(struct tgsi_full_instruction));
+ if (!instructions) {
+ break;
+ }
+ bld.instructions = instructions;
+ bld.max_instructions += LP_MAX_INSTRUCTIONS;
+ }
+
+ memcpy(bld.instructions + num_instructions,
+ &parse.FullToken.FullInstruction,
+ sizeof(bld.instructions[0]));
+
+ num_instructions++;
+ }
+
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* simply copy the immediate values into the next immediates[] slot */
+ {
+ const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ float imm[4];
+ assert(size <= 4);
+ assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
+ for (chan = 0; chan < 4; ++chan) {
+ imm[chan] = 0.0f;
+ }
+ for (chan = 0; chan < size; ++chan) {
+ unsigned swizzle = bld.swizzles[chan];
+ imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
+ }
+ bld.immediates[num_immediates] =
+ lp_build_const_aos(type,
+ imm[0], imm[1], imm[2], imm[3],
+ NULL);
+ num_immediates++;
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ while (pc != -1) {
+ struct tgsi_full_instruction *instr = bld.instructions + pc;
+ const struct tgsi_opcode_info *opcode_info =
+ tgsi_get_opcode_info(instr->Instruction.Opcode);
+ if (!emit_instruction(&bld, instr, opcode_info, &pc))
+ _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+ opcode_info->mnemonic);
+ }
+
+ if (0) {
+ LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(block);
+ debug_printf("11111111111111111111111111111 \n");
+ tgsi_dump(tokens, 0);
+ lp_debug_dump_value(function);
+ debug_printf("2222222222222222222222222222 \n");
+ }
+ tgsi_parse_free(&parse);
+
+ if (0) {
+ LLVMModuleRef module = LLVMGetGlobalParent(
+ LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
+ LLVMDumpModule(module);
+ }
+
+ FREE(bld.instructions);
+}
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 0e07f7f3f3..cd5b132b41 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -612,7 +612,6 @@ emit_fetch(
break;
case TGSI_UTIL_SIGN_SET:
- /* TODO: Use bitwese OR for floating point */
res = lp_build_abs( &bld->base, res );
/* fall through */
case TGSI_UTIL_SIGN_TOGGLE:
@@ -817,18 +816,10 @@ emit_store(
* High-level instruction translators.
*/
-enum tex_modifier {
- TEX_MODIFIER_NONE = 0,
- TEX_MODIFIER_PROJECTED,
- TEX_MODIFIER_LOD_BIAS,
- TEX_MODIFIER_EXPLICIT_LOD,
- TEX_MODIFIER_EXPLICIT_DERIV
-};
-
static void
emit_tex( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
- enum tex_modifier modifier,
+ enum lp_build_tex_modifier modifier,
LLVMValueRef *texel)
{
unsigned unit;
@@ -868,11 +859,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
return;
}
- if (modifier == TEX_MODIFIER_LOD_BIAS) {
+ if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
lod_bias = emit_fetch( bld, inst, 0, 3 );
explicit_lod = NULL;
}
- else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
+ else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
lod_bias = NULL;
explicit_lod = emit_fetch( bld, inst, 0, 3 );
}
@@ -881,21 +872,21 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
explicit_lod = NULL;
}
- if (modifier == TEX_MODIFIER_PROJECTED) {
+ if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
oow = emit_fetch( bld, inst, 0, 3 );
oow = lp_build_rcp(&bld->base, oow);
}
for (i = 0; i < num_coords; i++) {
coords[i] = emit_fetch( bld, inst, 0, i );
- if (modifier == TEX_MODIFIER_PROJECTED)
+ if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
coords[i] = lp_build_mul(&bld->base, coords[i], oow);
}
for (i = num_coords; i < 3; i++) {
coords[i] = bld->base.undef;
}
- if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
+ if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
for (i = 0; i < num_coords; i++) {
ddx[i] = emit_fetch( bld, inst, 1, i );
ddy[i] = emit_fetch( bld, inst, 2, i );
@@ -1628,11 +1619,11 @@ emit_instruction(
break;
case TGSI_OPCODE_TEX:
- emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
+ emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
break;
case TGSI_OPCODE_TXD:
- emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
+ emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
break;
case TGSI_OPCODE_UP2H:
@@ -1736,7 +1727,7 @@ emit_instruction(
break;
case TGSI_OPCODE_TXB:
- emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
+ emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
break;
case TGSI_OPCODE_NRM:
@@ -1841,11 +1832,11 @@ emit_instruction(
break;
case TGSI_OPCODE_TXL:
- emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
+ emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
break;
case TGSI_OPCODE_TXP:
- emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
+ emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
break;
case TGSI_OPCODE_BRK:
@@ -2063,11 +2054,16 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
{
/* save expanded instruction */
if (num_instructions == bld.max_instructions) {
- bld.instructions = REALLOC(bld.instructions,
- bld.max_instructions
- * sizeof(struct tgsi_full_instruction),
- (bld.max_instructions + LP_MAX_INSTRUCTIONS)
- * sizeof(struct tgsi_full_instruction));
+ struct tgsi_full_instruction *instructions;
+ instructions = REALLOC(bld.instructions,
+ bld.max_instructions
+ * sizeof(struct tgsi_full_instruction),
+ (bld.max_instructions + LP_MAX_INSTRUCTIONS)
+ * sizeof(struct tgsi_full_instruction));
+ if (!instructions) {
+ break;
+ }
+ bld.instructions = instructions;
bld.max_instructions += LP_MAX_INSTRUCTIONS;
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index a6c50dcf0c..5a13f39849 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -130,7 +130,7 @@ struct pb_vtbl
* flags is bitmask of PB_USAGE_CPU_READ/WRITE.
*/
void *(*map)( struct pb_buffer *buf,
- unsigned flags );
+ unsigned flags, void *flush_ctx );
void (*unmap)( struct pb_buffer *buf );
@@ -164,13 +164,13 @@ struct pb_vtbl
*/
static INLINE void *
pb_map(struct pb_buffer *buf,
- unsigned flags)
+ unsigned flags, void *flush_ctx)
{
assert(buf);
if(!buf)
return NULL;
assert(pipe_is_referenced(&buf->base.reference));
- return buf->vtbl->map(buf, flags);
+ return buf->vtbl->map(buf, flags, flush_ctx);
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index d6cf640582..c310f28f51 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -624,7 +624,7 @@ fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf)
assert(fenced_buf->data);
assert(fenced_buf->buffer);
- map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_WRITE);
+ map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_WRITE, NULL);
if(!map)
return PIPE_ERROR;
@@ -644,7 +644,7 @@ fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf)
assert(fenced_buf->data);
assert(fenced_buf->buffer);
- map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_READ);
+ map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_READ, NULL);
if(!map)
return PIPE_ERROR;
@@ -674,7 +674,7 @@ fenced_buffer_destroy(struct pb_buffer *buf)
static void *
fenced_buffer_map(struct pb_buffer *buf,
- unsigned flags)
+ unsigned flags, void *flush_ctx)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_manager *fenced_mgr = fenced_buf->mgr;
@@ -712,7 +712,7 @@ fenced_buffer_map(struct pb_buffer *buf,
}
if(fenced_buf->buffer) {
- map = pb_map(fenced_buf->buffer, flags);
+ map = pb_map(fenced_buf->buffer, flags, flush_ctx);
}
else {
assert(fenced_buf->data);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
index b706f429be..c2322eed19 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
@@ -70,7 +70,8 @@ malloc_buffer_destroy(struct pb_buffer *buf)
static void *
malloc_buffer_map(struct pb_buffer *buf,
- unsigned flags)
+ unsigned flags,
+ void *flush_ctx)
{
return malloc_buffer(buf)->data;
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 88501e8d72..b4d8107372 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -167,10 +167,10 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf)
static void *
pb_cache_buffer_map(struct pb_buffer *_buf,
- unsigned flags)
+ unsigned flags, void *flush_ctx)
{
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
- return pb_map(buf->buffer, flags);
+ return pb_map(buf->buffer, flags, flush_ctx);
}
@@ -242,7 +242,7 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
if(!pb_check_usage(desc->usage, buf->base.base.usage))
return FALSE;
- map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK);
+ map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL);
if (!map) {
return FALSE;
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index 0dc5b31a75..7604e75af8 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -158,7 +158,7 @@ pb_debug_buffer_fill(struct pb_debug_buffer *buf)
{
uint8_t *map;
- map = pb_map(buf->buffer, PB_USAGE_CPU_WRITE);
+ map = pb_map(buf->buffer, PB_USAGE_CPU_WRITE, NULL);
assert(map);
if(map) {
fill_random_pattern(map, buf->underflow_size);
@@ -181,7 +181,7 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf)
map = pb_map(buf->buffer,
PB_USAGE_CPU_READ |
- PB_USAGE_UNSYNCHRONIZED);
+ PB_USAGE_UNSYNCHRONIZED, NULL);
assert(map);
if(map) {
boolean underflow, overflow;
@@ -247,14 +247,14 @@ pb_debug_buffer_destroy(struct pb_buffer *_buf)
static void *
pb_debug_buffer_map(struct pb_buffer *_buf,
- unsigned flags)
+ unsigned flags, void *flush_ctx)
{
struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
void *map;
pb_debug_buffer_check(buf);
- map = pb_map(buf->buffer, flags);
+ map = pb_map(buf->buffer, flags, flush_ctx);
if(!map)
return NULL;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index faf7c35267..88da786216 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -108,11 +108,14 @@ mm_buffer_destroy(struct pb_buffer *buf)
static void *
mm_buffer_map(struct pb_buffer *buf,
- unsigned flags)
+ unsigned flags,
+ void *flush_ctx)
{
struct mm_buffer *mm_buf = mm_buffer(buf);
struct mm_pb_manager *mm = mm_buf->mgr;
+ /* XXX: it will be necessary to remap here to propagate flush_ctx */
+
return (unsigned char *) mm->map + mm_buf->block->ofs;
}
@@ -269,7 +272,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
mm->map = pb_map(mm->buffer,
PB_USAGE_CPU_READ |
- PB_USAGE_CPU_WRITE);
+ PB_USAGE_CPU_WRITE, NULL);
if(!mm->map)
goto failure;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
index 31f1ebbeb7..694a092f3c 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
@@ -103,13 +103,13 @@ pb_ondemand_buffer_destroy(struct pb_buffer *_buf)
static void *
pb_ondemand_buffer_map(struct pb_buffer *_buf,
- unsigned flags)
+ unsigned flags, void *flush_ctx)
{
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
if(buf->buffer) {
assert(!buf->data);
- return pb_map(buf->buffer, flags);
+ return pb_map(buf->buffer, flags, flush_ctx);
}
else {
assert(buf->data);
@@ -150,7 +150,7 @@ pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf)
if(!buf->buffer)
return PIPE_ERROR_OUT_OF_MEMORY;
- map = pb_map(buf->buffer, PB_USAGE_CPU_READ);
+ map = pb_map(buf->buffer, PB_USAGE_CPU_READ, NULL);
if(!map) {
pb_reference(&buf->buffer, NULL);
return PIPE_ERROR;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
index fdcce42878..2f7c7389ff 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
@@ -118,12 +118,14 @@ pool_buffer_destroy(struct pb_buffer *buf)
static void *
-pool_buffer_map(struct pb_buffer *buf, unsigned flags)
+pool_buffer_map(struct pb_buffer *buf, unsigned flags, void *flush_ctx)
{
struct pool_buffer *pool_buf = pool_buffer(buf);
struct pool_pb_manager *pool = pool_buf->mgr;
void *map;
+ /* XXX: it will be necessary to remap here to propagate flush_ctx */
+
pipe_mutex_lock(pool->mutex);
map = (unsigned char *) pool->map + pool_buf->start;
pipe_mutex_unlock(pool->mutex);
@@ -285,7 +287,7 @@ pool_bufmgr_create(struct pb_manager *provider,
pool->map = pb_map(pool->buffer,
PB_USAGE_CPU_READ |
- PB_USAGE_CPU_WRITE);
+ PB_USAGE_CPU_WRITE, NULL);
if(!pool->map)
goto failure;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index 7a3305aaf3..176f9aa38a 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -227,10 +227,13 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf)
static void *
pb_slab_buffer_map(struct pb_buffer *_buf,
- unsigned flags)
+ unsigned flags,
+ void *flush_ctx)
{
struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
+ /* XXX: it will be necessary to remap here to propagate flush_ctx */
+
++buf->mapCount;
return (void *) ((uint8_t *) buf->slab->virtual + buf->start);
}
@@ -316,7 +319,7 @@ pb_slab_create(struct pb_slab_manager *mgr)
* through this address so it is required that the buffer is pinned. */
slab->virtual = pb_map(slab->bo,
PB_USAGE_CPU_READ |
- PB_USAGE_CPU_WRITE);
+ PB_USAGE_CPU_WRITE, NULL);
if(!slab->virtual) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out_err1;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 6dee362d58..9d62c1d7e7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -377,6 +377,36 @@ tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
const unsigned *buf_sizes);
+static INLINE int
+tgsi_exec_get_shader_param(enum pipe_shader_cap param)
+{
+ switch(param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return INT_MAX;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return TGSI_EXEC_MAX_NESTING;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ return TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return TGSI_EXEC_MAX_CONST_BUFFER;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return PIPE_MAX_CONSTANT_BUFFERS;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return TGSI_EXEC_NUM_TEMPS;
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ return TGSI_EXEC_NUM_ADDRS;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return TGSI_EXEC_NUM_PREDS;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
#if defined __cplusplus
} /* extern "C" */
#endif
diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c
index 94d5bd3027..e209a98b70 100644
--- a/src/gallium/auxiliary/util/u_caps.c
+++ b/src/gallium/auxiliary/util/u_caps.c
@@ -75,6 +75,14 @@ util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out)
return FALSE;
}
break;
+ case UTIL_CAPS_CHECK_SHADER:
+ tmpi = screen->get_shader_param(screen, list[i] >> 24, list[i] & ((1 << 24) - 1));
+ ++i;
+ if (tmpi < (int)list[i++]) {
+ *out = i - 3;
+ return FALSE;
+ }
+ break;
case UTIL_CAPS_CHECK_UNIMPLEMENTED:
*out = i - 1;
return FALSE;
@@ -188,17 +196,17 @@ static unsigned caps_opengl_2_1[] = {
/* Shader Model 3 */
static unsigned caps_sm3[] = {
- UTIL_CHECK_INT(MAX_FS_INSTRUCTIONS, 512),
- UTIL_CHECK_INT(MAX_FS_INPUTS, 10),
- UTIL_CHECK_INT(MAX_FS_TEMPS, 32),
- UTIL_CHECK_INT(MAX_FS_ADDRS, 1),
- UTIL_CHECK_INT(MAX_FS_CONSTS, 224),
+ UTIL_CHECK_SHADER(FRAGMENT, MAX_INSTRUCTIONS, 512),
+ UTIL_CHECK_SHADER(FRAGMENT, MAX_INPUTS, 10),
+ UTIL_CHECK_SHADER(FRAGMENT, MAX_TEMPS, 32),
+ UTIL_CHECK_SHADER(FRAGMENT, MAX_ADDRS, 1),
+ UTIL_CHECK_SHADER(FRAGMENT, MAX_CONSTS, 224),
- UTIL_CHECK_INT(MAX_VS_INSTRUCTIONS, 512),
- UTIL_CHECK_INT(MAX_VS_INPUTS, 16),
- UTIL_CHECK_INT(MAX_VS_TEMPS, 32),
- UTIL_CHECK_INT(MAX_VS_ADDRS, 2),
- UTIL_CHECK_INT(MAX_VS_CONSTS, 256),
+ UTIL_CHECK_SHADER(VERTEX, MAX_INSTRUCTIONS, 512),
+ UTIL_CHECK_SHADER(VERTEX, MAX_INPUTS, 16),
+ UTIL_CHECK_SHADER(VERTEX, MAX_TEMPS, 32),
+ UTIL_CHECK_SHADER(VERTEX, MAX_ADDRS, 2),
+ UTIL_CHECK_SHADER(VERTEX, MAX_CONSTS, 256),
UTIL_CHECK_TERMINATE
};
diff --git a/src/gallium/auxiliary/util/u_caps.h b/src/gallium/auxiliary/util/u_caps.h
index b1074f9eb2..7bd2380041 100644
--- a/src/gallium/auxiliary/util/u_caps.h
+++ b/src/gallium/auxiliary/util/u_caps.h
@@ -38,6 +38,7 @@ enum u_caps_check_enum {
UTIL_CAPS_CHECK_INT,
UTIL_CAPS_CHECK_FLOAT,
UTIL_CAPS_CHECK_FORMAT,
+ UTIL_CAPS_CHECK_SHADER,
UTIL_CAPS_CHECK_UNIMPLEMENTED,
};
@@ -54,6 +55,9 @@ enum u_caps_check_enum {
#define UTIL_CHECK_FORMAT(format) \
UTIL_CAPS_CHECK_FORMAT, PIPE_FORMAT_##format
+#define UTIL_CHECK_SHADER(shader, cap, higher) \
+ UTIL_CAPS_CHECK_SHADER, (PIPE_SHADER_##shader << 24) | PIPE_SHADER_CAP_##cap, (unsigned)(higher)
+
#define UTIL_CHECK_UNIMPLEMENTED \
UTIL_CAPS_CHECK_UNIMPLEMENTED
diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c
index 43d09f1960..a4ee91b0cf 100644
--- a/src/gallium/auxiliary/util/u_format.c
+++ b/src/gallium/auxiliary/util/u_format.c
@@ -121,6 +121,54 @@ util_format_write_4ub(enum pipe_format format, const uint8_t *src, unsigned src_
boolean
+util_is_format_compatible(const struct util_format_description *src_desc,
+ const struct util_format_description *dst_desc)
+{
+ unsigned chan;
+
+ if (src_desc->format == dst_desc->format) {
+ return TRUE;
+ }
+
+ if (src_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
+ dst_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ return FALSE;
+ }
+
+ if (src_desc->block.bits != dst_desc->block.bits ||
+ src_desc->nr_channels != dst_desc->nr_channels ||
+ src_desc->colorspace != dst_desc->colorspace) {
+ return FALSE;
+ }
+
+ for (chan = 0; chan < 4; ++chan) {
+ if (src_desc->channel[chan].size !=
+ dst_desc->channel[chan].size) {
+ return FALSE;
+ }
+ }
+
+ for (chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = dst_desc->swizzle[chan];
+
+ if (swizzle < 4) {
+ if (src_desc->swizzle[chan] != swizzle) {
+ return FALSE;
+ }
+ if ((src_desc->channel[swizzle].type !=
+ dst_desc->channel[swizzle].type) ||
+ (src_desc->channel[swizzle].normalized !=
+ dst_desc->channel[swizzle].normalized)) {
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+
+boolean
util_format_fits_8unorm(const struct util_format_description *format_desc)
{
unsigned chan;
@@ -193,7 +241,10 @@ util_format_translate(enum pipe_format dst_format,
unsigned dst_step;
unsigned src_step;
- if (dst_format == src_format) {
+ dst_format_desc = util_format_description(dst_format);
+ src_format_desc = util_format_description(src_format);
+
+ if (util_is_format_compatible(src_format_desc, dst_format_desc)) {
/*
* Trivial case.
*/
@@ -204,9 +255,6 @@ util_format_translate(enum pipe_format dst_format,
return;
}
- dst_format_desc = util_format_description(dst_format);
- src_format_desc = util_format_description(src_format);
-
assert(dst_x % dst_format_desc->block.width == 0);
assert(dst_y % dst_format_desc->block.height == 0);
assert(src_x % src_format_desc->block.width == 0);
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 8e786a390a..03b73c0e98 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -440,6 +440,48 @@ util_format_is_depth_and_stencil(enum pipe_format format)
desc->swizzle[1] != UTIL_FORMAT_SWIZZLE_NONE) ? TRUE : FALSE;
}
+
+/**
+ * Give the RGBA colormask of the channels that can be represented in this
+ * format.
+ *
+ * That is, the channels whose values are preserved.
+ */
+static INLINE unsigned
+util_format_colormask(const struct util_format_description *desc)
+{
+ unsigned colormask;
+ unsigned chan;
+
+ switch (desc->colorspace) {
+ case UTIL_FORMAT_COLORSPACE_RGB:
+ case UTIL_FORMAT_COLORSPACE_SRGB:
+ case UTIL_FORMAT_COLORSPACE_YUV:
+ colormask = 0;
+ for (chan = 0; chan < 4; ++chan) {
+ if (desc->swizzle[chan] < 4) {
+ colormask |= (1 << chan);
+ }
+ }
+ return colormask;
+ case UTIL_FORMAT_COLORSPACE_ZS:
+ return 0;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
+/**
+ * Whether the src format can be blitted to destation format with a simple
+ * memcpy.
+ */
+boolean
+util_is_format_compatible(const struct util_format_description *src_desc,
+ const struct util_format_description *dst_desc);
+
+
/**
* Whether this format is a rgab8 variant.
*
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index 78473bf35a..6ed39561fb 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -399,7 +399,6 @@ static INLINE boolean util_get_offset(
}
}
-
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h
index 42c40b2aa7..81ffc9fb27 100644
--- a/src/gallium/auxiliary/util/u_linear.h
+++ b/src/gallium/auxiliary/util/u_linear.h
@@ -33,6 +33,7 @@
#ifndef U_LINEAR_H
#define U_LINEAR_H
+#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
struct u_linear_format_block