summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/Makefile1
-rw-r--r--src/gallium/auxiliary/SConscript1
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c174
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h17
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_line.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c48
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c10
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c9
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c62
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h13
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c25
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c23
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c26
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.c75
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.h10
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c7
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c10
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c12
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.h3
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c59
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c125
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c226
-rw-r--r--src/gallium/auxiliary/os/os_thread.h9
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt36
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c81
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c19
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c16
-rw-r--r--src/gallium/auxiliary/translate/translate.h3
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c19
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c10
-rw-r--r--src/gallium/auxiliary/util/u_caps.c245
-rw-r--r--src/gallium/auxiliary/util/u_caps.h67
-rw-r--r--src/gallium/auxiliary/util/u_debug.c42
-rw-r--r--src/gallium/auxiliary/util/u_debug.h39
-rw-r--r--src/gallium/auxiliary/util/u_dump_state.c4
-rw-r--r--src/gallium/auxiliary/util/u_format.h12
-rw-r--r--src/gallium/auxiliary/util/u_format_s3tc.c361
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.c8
-rw-r--r--src/gallium/auxiliary/util/u_tile.c4
53 files changed, 1306 insertions, 667 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index f8e65cf6c6..2daed382cf 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -101,6 +101,7 @@ C_SOURCES = \
util/u_blit.c \
util/u_blitter.c \
util/u_cache.c \
+ util/u_caps.c \
util/u_cpu_detect.c \
util/u_dl.c \
util/u_draw_quad.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index db3a1e7311..a0673df8a8 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -144,6 +144,7 @@ source = [
'util/u_blit.c',
'util/u_blitter.c',
'util/u_cache.c',
+ 'util/u_caps.c',
'util/u_cpu_detect.c',
'util/u_debug.c',
'util/u_debug_memory.c',
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 6fd4bd3642..c5fe7efa02 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -313,10 +313,13 @@ void cso_release_all( struct cso_context *ctx )
}
+/**
+ * Free the CSO context. NOTE: the state tracker should have previously called
+ * cso_release_all().
+ */
void cso_destroy_context( struct cso_context *ctx )
{
if (ctx) {
- /*cso_release_all( ctx );*/
FREE( ctx );
}
}
@@ -349,6 +352,7 @@ enum pipe_error cso_set_blend(struct cso_context *ctx,
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
+ memset(&cso->state, 0, sizeof cso->state);
memcpy(&cso->state, templ, key_size);
cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state;
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 9d11031769..2c234285b5 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -24,6 +24,8 @@
/* generates the draw jit function */
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
+static void
+draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
static void
init_globals(struct draw_llvm *llvm)
@@ -205,7 +207,9 @@ draw_llvm_create(struct draw_context *draw)
void
draw_llvm_destroy(struct draw_llvm *llvm)
{
- free(llvm);
+ LLVMDisposePassManager(llvm->pass);
+
+ FREE(llvm);
}
struct draw_llvm_variant *
@@ -218,6 +222,7 @@ draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs)
llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
draw_llvm_generate(llvm, variant);
+ draw_llvm_generate_elts(llvm, variant);
return variant;
}
@@ -252,7 +257,8 @@ generate_vs(struct draw_llvm *llvm,
NULL /*pos*/,
inputs,
outputs,
- NULL/*sampler*/);
+ NULL/*sampler*/,
+ &llvm->draw->vs.vertex_shader->info);
}
#if DEBUG_STORE
@@ -285,10 +291,16 @@ generate_fetch(LLVMBuilderRef builder,
LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
&indices, 1, "");
LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf);
+ LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf);
LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf);
- LLVMValueRef stride = LLVMBuildMul(builder,
- vb_stride,
- index, "");
+ LLVMValueRef cond;
+ LLVMValueRef stride;
+
+ cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
+
+ index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
+
+ stride = LLVMBuildMul(builder, vb_stride, index, "");
vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
@@ -689,6 +701,158 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
lp_disassemble(variant->jit_func);
}
+
+static void
+draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
+{
+ LLVMTypeRef arg_types[7];
+ LLVMTypeRef func_type;
+ LLVMValueRef context_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
+ LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+ struct draw_context *draw = llvm->draw;
+ unsigned i, j;
+ struct lp_build_context bld;
+ struct lp_build_context bld_int;
+ struct lp_build_loop_state lp_loop;
+ struct lp_type vs_type = lp_type_float_vec(32);
+ const int max_vertices = 4;
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+ LLVMValueRef fetch_max;
+
+ arg_types[0] = llvm->context_ptr_type; /* context */
+ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
+ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */
+ arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */
+ arg_types[4] = LLVMInt32Type(); /* fetch_count */
+ arg_types[5] = LLVMInt32Type(); /* stride */
+ arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
+
+ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+ variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
+ LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
+ for(i = 0; i < Elements(arg_types); ++i)
+ if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute);
+
+ context_ptr = LLVMGetParam(variant->function_elts, 0);
+ io_ptr = LLVMGetParam(variant->function_elts, 1);
+ vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
+ fetch_elts = LLVMGetParam(variant->function_elts, 3);
+ fetch_count = LLVMGetParam(variant->function_elts, 4);
+ stride = LLVMGetParam(variant->function_elts, 5);
+ vb_ptr = LLVMGetParam(variant->function_elts, 6);
+
+ lp_build_name(context_ptr, "context");
+ lp_build_name(io_ptr, "io");
+ lp_build_name(vbuffers_ptr, "vbuffers");
+ lp_build_name(fetch_elts, "fetch_elts");
+ lp_build_name(fetch_count, "fetch_count");
+ lp_build_name(stride, "stride");
+ lp_build_name(vb_ptr, "vb");
+
+ /*
+ * Function body
+ */
+
+ block = LLVMAppendBasicBlock(variant->function_elts, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ lp_build_context_init(&bld, builder, vs_type);
+ lp_build_context_init(&bld_int, builder, lp_type_int(32));
+
+ step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+
+ fetch_max = LLVMBuildSub(builder, fetch_count,
+ LLVMConstInt(LLVMInt32Type(), 1, 0),
+ "fetch_max");
+
+ lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
+ {
+ LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
+ LLVMValueRef io;
+ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
+
+ io_itr = lp_loop.counter;
+ io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
+#if DEBUG_STORE
+ lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
+ io_itr, io, lp_loop.counter);
+#endif
+ for (i = 0; i < NUM_CHANNELS; ++i) {
+ LLVMValueRef true_index = LLVMBuildAdd(
+ builder,
+ lp_loop.counter,
+ LLVMConstInt(LLVMInt32Type(), i, 0), "");
+ LLVMValueRef fetch_ptr;
+
+ /* make sure we're not out of bounds which can happen
+ * if fetch_count % 4 != 0, because on the last iteration
+ * a few of the 4 vertex fetches will be out of bounds */
+ true_index = lp_build_min(&bld_int, true_index, fetch_max);
+
+ fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
+ &true_index, 1, "");
+ true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
+ for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
+ struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
+ LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
+ velem->vertex_buffer_index,
+ 0);
+ LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
+ &vb_index, 1, "");
+ generate_fetch(builder, vbuffers_ptr,
+ &aos_attribs[j][i], velem, vb, true_index);
+ }
+ }
+ convert_to_soa(builder, aos_attribs, inputs,
+ draw->pt.nr_vertex_elements);
+
+ ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
+ generate_vs(llvm,
+ builder,
+ outputs,
+ ptr_aos,
+ context_ptr);
+
+ convert_to_aos(builder, io, outputs,
+ draw->vs.vertex_shader->info.num_outputs,
+ max_vertices);
+ }
+ lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
+
+ LLVMBuildRetVoid(builder);
+
+ LLVMDisposeBuilder(builder);
+
+ /*
+ * Translate the LLVM IR into machine code.
+ */
+#ifdef DEBUG
+ if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
+ LLVMDumpValue(variant->function_elts);
+ assert(0);
+ }
+#endif
+
+ LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
+
+ if (0) {
+ LLVMDumpValue(variant->function_elts);
+ debug_printf("\n");
+ }
+ variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal(
+ llvm->draw->engine, variant->function_elts);
+
+ if (0)
+ lp_disassemble(variant->jit_func_elts);
+}
+
void
draw_llvm_make_variant_key(struct draw_llvm *llvm,
struct draw_llvm_variant_key *key)
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index 28b9044a81..58fee7f9d6 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -78,7 +78,10 @@ struct draw_jit_context
#define draw_jit_vbuffer_stride(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, 0, "stride")
-#define draw_jit_vbuffer_offset(_builder, _ptr) \
+#define draw_jit_vbuffer_max_index(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, 1, "max_index")
+
+#define draw_jit_vbuffer_offset(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, 2, "buffer_offset")
@@ -91,6 +94,16 @@ typedef void
unsigned stride,
struct pipe_vertex_buffer *vertex_buffers);
+
+typedef void
+(*draw_jit_vert_func_elts)(struct draw_jit_context *context,
+ struct vertex_header *io,
+ const char *vbuffers[PIPE_MAX_ATTRIBS],
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ unsigned stride,
+ struct pipe_vertex_buffer *vertex_buffers);
+
struct draw_llvm {
struct draw_context *draw;
@@ -119,7 +132,9 @@ struct draw_llvm_variant
{
struct draw_llvm_variant_key key;
LLVMValueRef function;
+ LLVMValueRef function_elts;
draw_jit_vert_func jit_func;
+ draw_jit_vert_func_elts jit_func_elts;
struct draw_llvm_variant *next;
};
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index ee2b811603..abbf6247ab 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -137,7 +137,7 @@ emit_vertex( struct vbuf_stage *vbuf,
*/
/* Note: we really do want data[0] here, not data[pos]:
*/
- vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0);
+ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0, ~0);
vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr);
if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr);
@@ -271,7 +271,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
translate_key_sanitize(&hw_key);
vbuf->translate = translate_cache_find(vbuf->cache, &hw_key);
- vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0);
+ vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0, ~0);
}
vbuf->point_size = vbuf->stage.draw->rasterizer->point_size;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index 265a420d01..ab16706581 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -54,7 +54,6 @@ static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage )
/**
* Draw a wide line by drawing a quad (two triangles).
- * XXX need to disable polygon stipple.
*/
static void wideline_line( struct draw_stage *stage,
struct prim_header *header )
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 4bb3282f62..a2bfb693c0 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -132,6 +132,7 @@ struct draw_context
struct draw_pt_middle_end *fetch_emit;
struct draw_pt_middle_end *fetch_shade_emit;
struct draw_pt_middle_end *general;
+ struct draw_pt_middle_end *llvm;
} middle;
struct {
@@ -253,8 +254,8 @@ struct draw_context
#ifdef HAVE_LLVM
LLVMExecutionEngineRef engine;
- boolean use_llvm;
#endif
+
void *driver_private;
};
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index b5876bb1bd..b853f3a89f 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -37,6 +37,13 @@
#include "util/u_math.h"
#include "util/u_prim.h"
+
+DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE)
+#ifdef HAVE_LLVM
+DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
+#endif
+
static unsigned trim( unsigned count, unsigned first, unsigned incr )
{
if (count < first)
@@ -90,12 +97,16 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_SHADE;
}
- if (opt == 0)
- middle = draw->pt.middle.fetch_emit;
- else if (opt == PT_SHADE && !draw->pt.no_fse)
- middle = draw->pt.middle.fetch_shade_emit;
- else
- middle = draw->pt.middle.general;
+ if (draw->pt.middle.llvm) {
+ middle = draw->pt.middle.llvm;
+ } else {
+ if (opt == 0)
+ middle = draw->pt.middle.fetch_emit;
+ else if (opt == PT_SHADE && !draw->pt.no_fse)
+ middle = draw->pt.middle.fetch_shade_emit;
+ else
+ middle = draw->pt.middle.general;
+ }
/* Pick the right frontend
@@ -122,8 +133,8 @@ draw_pt_arrays(struct draw_context *draw,
boolean draw_pt_init( struct draw_context *draw )
{
- draw->pt.test_fse = debug_get_bool_option("DRAW_FSE", FALSE);
- draw->pt.no_fse = debug_get_bool_option("DRAW_NO_FSE", FALSE);
+ draw->pt.test_fse = debug_get_option_draw_fse();
+ draw->pt.no_fse = debug_get_option_draw_no_fse();
draw->pt.front.vcache = draw_pt_vcache( draw );
if (!draw->pt.front.vcache)
@@ -141,25 +152,26 @@ boolean draw_pt_init( struct draw_context *draw )
if (!draw->pt.middle.fetch_shade_emit)
return FALSE;
-#if HAVE_LLVM
- draw->use_llvm = debug_get_bool_option("DRAW_USE_LLVM", TRUE);
- if (draw->use_llvm)
- draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit_llvm( draw );
-#else
- draw->pt.middle.general = NULL;
-#endif
-
- if (!draw->pt.middle.general)
- draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
+ draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
if (!draw->pt.middle.general)
return FALSE;
+#if HAVE_LLVM
+ if (debug_get_option_draw_use_llvm())
+ draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw );
+#endif
+
return TRUE;
}
void draw_pt_destroy( struct draw_context *draw )
{
+ if (draw->pt.middle.llvm) {
+ draw->pt.middle.llvm->destroy( draw->pt.middle.llvm );
+ draw->pt.middle.llvm = NULL;
+ }
+
if (draw->pt.middle.general) {
draw->pt.middle.general->destroy( draw->pt.middle.general );
draw->pt.middle.general = NULL;
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index a7917f54b0..ad48fa39a4 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -171,12 +171,14 @@ void draw_pt_emit( struct pt_emit *emit,
translate->set_buffer(translate,
0,
vertex_data,
- stride );
+ stride,
+ ~0);
translate->set_buffer(translate,
1,
&draw->rasterizer->point_size,
- 0);
+ 0,
+ ~0);
translate->run( translate,
0,
@@ -232,11 +234,11 @@ void draw_pt_emit_linear(struct pt_emit *emit,
goto fail;
translate->set_buffer(translate, 0,
- vertex_data, stride);
+ vertex_data, stride, count - 1);
translate->set_buffer(translate, 1,
&draw->rasterizer->point_size,
- 0);
+ 0, ~0);
translate->run(translate,
0,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index 252be5053e..a1347221b5 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -149,7 +149,8 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
fetch->translate->set_buffer(fetch->translate,
draw->pt.nr_vertex_buffers,
&vh,
- 0);
+ 0,
+ ~0);
}
}
@@ -172,7 +173,8 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].stride );
+ draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].max_index);
}
translate->run_elts( translate,
@@ -198,7 +200,8 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].stride );
+ draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].max_index);
}
translate->run( translate,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 1994ddf2bc..d7735bf1ac 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -168,7 +168,8 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
feme->translate->set_buffer(feme->translate,
draw->pt.nr_vertex_buffers,
&feme->point_size,
- 0);
+ 0,
+ ~0);
}
feme->point_size = draw->rasterizer->point_size;
@@ -178,7 +179,8 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].stride );
+ draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].max_index);
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 389e2b105e..cbb5b6c960 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -167,7 +167,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
i,
((const ubyte *) draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].stride );
+ draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].max_index );
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index f71271bd91..35913a5995 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -167,8 +167,6 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
- struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
unsigned opt = fpme->opt;
unsigned alloc_count = align( fetch_count, 4 );
@@ -182,35 +180,13 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
return;
}
- /* Fetch into our vertex buffer
- */
- draw_pt_fetch_run( fpme->fetch,
- fetch_elts,
- fetch_count,
- (char *)pipeline_verts );
-
- /* Run the shader, note that this overwrites the data[] parts of
- * the pipeline verts. If there is no shader, eg if
- * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
- * already in the correct place.*/
- if (opt & PT_SHADE)
- {
- vshader->run_linear(vshader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.vs_constants,
- fetch_count,
- fpme->vertex_size,
- fpme->vertex_size);
- if (gshader)
- draw_geometry_shader_run(gshader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.gs_constants,
- fetch_count,
- fpme->vertex_size,
- fpme->vertex_size);
- }
+ fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
+ pipeline_verts,
+ (const char **)draw->pt.user.vbuffer,
+ fetch_elts,
+ fetch_count,
+ fpme->vertex_size,
+ draw->pt.vertex_buffer );
if (draw_pt_post_vs_run( fpme->post_vs,
pipeline_verts,
@@ -373,7 +349,31 @@ static void llvm_middle_end_finish( struct draw_pt_middle_end *middle )
static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_llvm_variant *variant = NULL;
+ variant = fpme->variants;
+ while(variant) {
+ struct draw_llvm_variant *next = variant->next;
+
+ if (variant->function_elts) {
+ if (variant->function_elts)
+ LLVMFreeMachineCodeForFunction(draw->engine,
+ variant->function_elts);
+ LLVMDeleteFunction(variant->function_elts);
+ }
+
+ if (variant->function) {
+ if (variant->function)
+ LLVMFreeMachineCodeForFunction(draw->engine,
+ variant->function);
+ LLVMDeleteFunction(variant->function);
+ }
+
+ FREE(variant);
+
+ variant = next;
+ }
if (fpme->fetch)
draw_pt_fetch_destroy( fpme->fetch );
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index cfd5154024..c2832eefa2 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -46,7 +46,7 @@
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_exec.h"
-
+DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE)
void
draw_vs_set_constants(struct draw_context *draw,
@@ -157,7 +157,7 @@ draw_delete_vertex_shader(struct draw_context *draw,
boolean
draw_vs_init( struct draw_context *draw )
{
- draw->dump_vs = debug_get_bool_option("GALLIUM_DUMP_VS", FALSE);
+ draw->dump_vs = debug_get_option_gallium_dump_vs();
draw->vs.machine = tgsi_exec_machine_create();
if (!draw->vs.machine)
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index f49332352b..6c7e94db43 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -80,7 +80,8 @@ struct draw_vs_varient {
void (*set_buffer)( struct draw_vs_varient *,
unsigned i,
const void *ptr,
- unsigned stride );
+ unsigned stride,
+ unsigned max_stride );
void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
unsigned start,
@@ -168,8 +169,9 @@ draw_create_vs_ppc(struct draw_context *draw,
struct draw_vs_varient_key;
struct draw_vertex_shader;
-struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key );
+struct draw_vs_varient *
+draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
@@ -187,8 +189,9 @@ struct translate *draw_vs_get_fetch( struct draw_context *draw,
struct translate *draw_vs_get_emit( struct draw_context *draw,
struct translate_key *key );
-struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key );
+struct draw_vs_varient *
+draw_vs_create_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index e7121f3654..19f49e34c8 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -2089,13 +2089,21 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
}
+/** cast wrapper */
+static INLINE struct draw_vs_varient_aos_sse *
+draw_vs_varient_aos_sse(struct draw_vs_varient *varient)
+{
+ return (struct draw_vs_varient_aos_sse *) varient;
+}
+
static void vaos_set_buffer( struct draw_vs_varient *varient,
unsigned buf,
const void *ptr,
- unsigned stride )
+ unsigned stride,
+ unsigned max_stride)
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
if (buf < vaos->nr_vb) {
vaos->buffer[buf].base_ptr = (char *)ptr;
@@ -2112,7 +2120,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
unsigned count,
void *output_buffer )
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
struct aos_machine *machine = vaos->draw->vs.aos_machine;
unsigned i;
@@ -2136,7 +2144,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
unsigned count,
void *output_buffer )
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
struct aos_machine *machine = vaos->draw->vs.aos_machine;
unsigned i;
@@ -2165,7 +2173,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
static void vaos_destroy( struct draw_vs_varient *varient )
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
FREE( vaos->buffer );
@@ -2241,13 +2249,14 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
}
-struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key )
+struct draw_vs_varient *
+draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
{
struct draw_vs_varient *varient = varient_aos_sse( vs, key );
if (varient == NULL) {
- varient = draw_vs_varient_generic( vs, key );
+ varient = draw_vs_create_varient_generic( vs, key );
}
return varient;
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 7deca2b69d..bc34d390da 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -203,7 +203,7 @@ draw_create_vs_exec(struct draw_context *draw,
vs->base.prepare = vs_exec_prepare;
vs->base.run_linear = vs_exec_run_linear;
vs->base.delete = vs_exec_delete;
- vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.create_varient = draw_vs_create_varient_generic;
vs->machine = draw->vs.machine;
return &vs->base;
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index d869eecec5..5df84916c5 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -125,7 +125,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
*/
shader->func(inputs_soa, outputs_soa, temps_soa,
(float (*)[4]) shader->base.immediates,
- (const float (*)[4])constants[0],
+ (float (*)[4])constants[0],
ppc_builtin_constants);
/* convert (up to) four output verts from SoA back to AoS format */
@@ -190,7 +190,7 @@ draw_create_vs_ppc(struct draw_context *draw,
vs->base.create_varient = draw_vs_varient_aos_ppc;
else
#endif
- vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.create_varient = draw_vs_create_varient_generic;
vs->base.prepare = vs_ppc_prepare;
vs->base.run_linear = vs_ppc_run_linear;
vs->base.delete = vs_ppc_delete;
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 54e6423388..14c95082a9 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -165,9 +165,9 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.draw = draw;
if (1)
- vs->base.create_varient = draw_vs_varient_aos_sse;
+ vs->base.create_varient = draw_vs_create_varient_aos_sse;
else
- vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.create_varient = draw_vs_create_varient_generic;
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 5ed706cb4f..6eb26927f2 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -66,14 +66,16 @@ struct draw_vs_varient_generic {
static void vsvg_set_buffer( struct draw_vs_varient *varient,
unsigned buffer,
const void *ptr,
- unsigned stride )
+ unsigned stride,
+ unsigned max_index )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
vsvg->fetch->set_buffer(vsvg->fetch,
buffer,
ptr,
- stride);
+ stride,
+ max_index );
}
@@ -172,12 +174,14 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->emit->set_buffer( vsvg->emit,
0,
temp_buffer,
- temp_vertex_stride );
+ temp_vertex_stride,
+ ~0 );
vsvg->emit->set_buffer( vsvg->emit,
1,
&vsvg->draw->rasterizer->point_size,
- 0);
+ 0,
+ ~0 );
vsvg->emit->run( vsvg->emit,
0, count,
@@ -232,12 +236,14 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->emit->set_buffer( vsvg->emit,
0,
temp_buffer,
- temp_vertex_stride );
+ temp_vertex_stride,
+ ~0 );
vsvg->emit->set_buffer( vsvg->emit,
1,
&vsvg->draw->rasterizer->point_size,
- 0);
+ 0,
+ ~0 );
vsvg->emit->run( vsvg->emit,
0, count,
@@ -257,8 +263,9 @@ static void vsvg_destroy( struct draw_vs_varient *varient )
}
-struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key )
+struct draw_vs_varient *
+draw_vs_create_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
{
unsigned i;
struct translate_key fetch, emit;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 8e8fcccf56..20ae958714 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1210,6 +1210,14 @@ LLVMValueRef
lp_build_cos(struct lp_build_context *bld,
LLVMValueRef a)
{
+#ifdef PIPE_OS_WINDOWS
+ /*
+ * FIXME: X86 backend translates llvm.cos.v4f32 to 4 calls to CRT's cosf()
+ * which is neither efficient nor does the CRT linkage work on Windows
+ * causing segmentation fault. So simply disable the code for now.
+ */
+ return bld->one;
+#else
const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -1220,6 +1228,7 @@ lp_build_cos(struct lp_build_context *bld,
util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width);
return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
+#endif
}
@@ -1230,6 +1239,14 @@ LLVMValueRef
lp_build_sin(struct lp_build_context *bld,
LLVMValueRef a)
{
+#ifdef PIPE_OS_WINDOWS
+ /*
+ * FIXME: X86 backend translates llvm.sin.v4f32 to 4 calls to CRT's sinf()
+ * which is neither efficient nor does the CRT linkage work on Windows
+ * causing segmentation fault. So simply disable the code for now.
+ */
+ return bld->zero;
+#else
const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -1240,6 +1257,7 @@ lp_build_sin(struct lp_build_context *bld,
util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width);
return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
+#endif
}
@@ -1502,8 +1520,10 @@ lp_build_log2_approx(struct lp_build_context *bld,
res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
}
- if(p_exp)
+ if(p_exp) {
+ exp = LLVMBuildBitCast(bld->builder, exp, vec_type, "");
*p_exp = exp;
+ }
if(p_floor_log2)
*p_floor_log2 = logexp;
@@ -1573,8 +1593,10 @@ lp_build_float_log2_approx(struct lp_build_context *bld,
res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
}
- if(p_exp)
+ if(p_exp) {
+ exp = LLVMBuildBitCast(bld->builder, exp, float_type, "");
*p_exp = exp;
+ }
if(p_floor_log2)
*p_floor_log2 = logexp;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index e60ab4f6ba..8f15b1d287 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -792,3 +792,78 @@ lp_build_endif(struct lp_build_if_state *ctx)
/* Resume building code at end of the ifthen->merge_block */
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
}
+
+
+/**
+ * Allocate a scalar (or vector) variable.
+ *
+ * Although not strictly part of control flow, control flow has deep impact in
+ * how variables should be allocated.
+ *
+ * The mem2reg optimization pass is the recommended way to dealing with mutable
+ * variables, and SSA. It looks for allocas and if it can handle them, it
+ * promotes them, but only looks for alloca instructions in the entry block of
+ * the function. Being in the entry block guarantees that the alloca is only
+ * executed once, which makes analysis simpler.
+ *
+ * See also:
+ * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory
+ */
+LLVMValueRef
+lp_build_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ const char *name)
+{
+ LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
+ LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
+ LLVMBuilderRef first_builder = LLVMCreateBuilder();
+ LLVMValueRef res;
+
+ LLVMPositionBuilderAtEnd(first_builder, first_block);
+ LLVMPositionBuilderBefore(first_builder, first_instr);
+
+ res = LLVMBuildAlloca(first_builder, type, name);
+
+ LLVMDisposeBuilder(first_builder);
+
+ return res;
+}
+
+
+/**
+ * Allocate an array of scalars/vectors.
+ *
+ * mem2reg pass is not capable of promoting structs or arrays to registers, but
+ * we still put it in the first block anyway as failure to put allocas in the
+ * first block may prevent the X86 backend from successfully align the stack as
+ * required.
+ *
+ * Also the scalarrepl pass is supossedly more powerful and can promote
+ * arrays in many cases.
+ *
+ * See also:
+ * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory
+ */
+LLVMValueRef
+lp_build_array_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ LLVMValueRef count,
+ const char *name)
+{
+ LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
+ LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
+ LLVMBuilderRef first_builder = LLVMCreateBuilder();
+ LLVMValueRef res;
+
+ LLVMPositionBuilderBefore(first_builder, first_instr);
+
+ res = LLVMBuildArrayAlloca(first_builder, type, count, name);
+
+ LLVMDisposeBuilder(first_builder);
+
+ return res;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
index 745838570c..fffb493a93 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
@@ -156,5 +156,15 @@ lp_build_endif(struct lp_build_if_state *ctx);
LLVMBasicBlockRef
lp_build_insert_new_block(LLVMBuilderRef builder, const char *name);
+LLVMValueRef
+lp_build_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ const char *name);
+
+LLVMValueRef
+lp_build_array_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ LLVMValueRef count,
+ const char *name);
#endif /* !LP_BLD_FLOW_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index a5a019fa92..6257e9a404 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -40,6 +40,7 @@
#include "lp_bld_init.h"
#include "lp_bld_type.h"
+#include "lp_bld_flow.h"
#include "lp_bld_format.h"
@@ -370,11 +371,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
}
- /*
- * XXX: this should better go to the first block in the function
- */
-
- tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
+ tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
/*
* Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index de07c222a3..5067d0a164 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -27,6 +27,7 @@
#include "pipe/p_compiler.h"
+#include "util/u_cpu_detect.h"
#include "util/u_debug.h"
#include "lp_bld_init.h"
@@ -62,6 +63,15 @@ lp_build_init(void)
if (!lp_build_target)
lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine);
+
+ util_cpu_detect();
+
+#if 0
+ /* For simulating less capable machines */
+ util_cpu_caps.has_sse3 = 0;
+ util_cpu_caps.has_ssse3 = 0;
+ util_cpu_caps.has_sse4_1 = 0;
+#endif
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index a3b6970116..d13fa1a5d0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -472,18 +472,6 @@ lp_build_select_aos(struct lp_build_context *bld,
}
}
-LLVMValueRef
-lp_build_alloca(struct lp_build_context *bld)
-{
- const struct lp_type type = bld->type;
-
- if (type.length > 1) { /*vector*/
- return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), "");
- } else { /*scalar*/
- return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), "");
- }
-}
-
/** Return (a & ~b) */
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
index 00a8c75019..29f9fc3b20 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -76,9 +76,6 @@ lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef b,
const boolean cond[4]);
-LLVMValueRef
-lp_build_alloca(struct lp_build_context *bld);
-
LLVMValueRef
lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index eb75b9b393..195a4953ab 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -77,6 +77,11 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
*/
state->format = view->format;
+ state->swizzle_r = view->swizzle_r;
+ state->swizzle_g = view->swizzle_g;
+ state->swizzle_b = view->swizzle_b;
+ state->swizzle_a = view->swizzle_a;
+
state->target = texture->target;
state->pot_width = util_is_pot(texture->width0);
state->pot_height = util_is_pot(texture->height0);
@@ -181,54 +186,16 @@ lp_build_sample_offset(struct lp_build_context *bld,
LLVMValueRef offset;
x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
+ offset = lp_build_mul(bld, x, x_stride);
- if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- LLVMValueRef x_lo, x_hi;
- LLVMValueRef y_lo, y_hi;
- LLVMValueRef x_stride_lo, x_stride_hi;
- LLVMValueRef y_stride_lo, y_stride_hi;
- LLVMValueRef x_offset_lo, x_offset_hi;
- LLVMValueRef y_offset_lo, y_offset_hi;
- LLVMValueRef offset_lo, offset_hi;
-
- /* XXX 1D & 3D addressing not done yet */
- assert(!z);
- assert(!z_stride);
-
- x_lo = LLVMBuildAnd(bld->builder, x, bld->one, "");
- y_lo = LLVMBuildAnd(bld->builder, y, bld->one, "");
-
- x_hi = LLVMBuildLShr(bld->builder, x, bld->one, "");
- y_hi = LLVMBuildLShr(bld->builder, y, bld->one, "");
-
- x_stride_lo = x_stride;
- y_stride_lo = lp_build_const_vec(bld->type, 2*format_desc->block.bits/8);
-
- x_stride_hi = lp_build_const_vec(bld->type, 4*format_desc->block.bits/8);
- y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, "");
-
- x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo);
- y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo);
- offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo);
-
- x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi);
- y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi);
- offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi);
-
- offset = lp_build_add(bld, offset_hi, offset_lo);
+ if (y && y_stride) {
+ LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
+ offset = lp_build_add(bld, offset, y_offset);
}
- else {
- offset = lp_build_mul(bld, x, x_stride);
-
- if (y && y_stride) {
- LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
- offset = lp_build_add(bld, offset, y_offset);
- }
-
- if (z && z_stride) {
- LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
- offset = lp_build_add(bld, offset, z_offset);
- }
+
+ if (z && z_stride) {
+ LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
+ offset = lp_build_add(bld, offset, z_offset);
}
return offset;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index e287376385..8ceb20473d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -54,8 +54,14 @@ struct lp_build_context;
*/
struct lp_sampler_static_state
{
- /* pipe_texture's state */
+ /* pipe_sampler_view's state */
enum pipe_format format;
+ unsigned swizzle_r:3;
+ unsigned swizzle_g:3;
+ unsigned swizzle_b:3;
+ unsigned swizzle_a:3;
+
+ /* pipe_texture's state */
unsigned target:3;
unsigned pot_width:1;
unsigned pot_height:1;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index c9b613e21c..54c0ad7ce4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -185,6 +185,53 @@ texture_dims(enum pipe_texture_target tex)
}
+static LLVMValueRef
+lp_build_swizzle_chan_soa(struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ enum util_format_swizzle swizzle)
+{
+ switch (swizzle) {
+ case PIPE_SWIZZLE_RED:
+ case PIPE_SWIZZLE_GREEN:
+ case PIPE_SWIZZLE_BLUE:
+ case PIPE_SWIZZLE_ALPHA:
+ return unswizzled[swizzle];
+ case PIPE_SWIZZLE_ZERO:
+ return lp_build_zero(type);
+ case PIPE_SWIZZLE_ONE:
+ return lp_build_one(type);
+ default:
+ assert(0);
+ return lp_build_undef(type);
+ }
+}
+
+
+static void
+lp_build_swizzle_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef *texel)
+{
+ LLVMValueRef unswizzled[4];
+ unsigned char swizzles[4];
+ unsigned chan;
+
+ for (chan = 0; chan < 4; ++chan) {
+ unswizzled[chan] = texel[chan];
+ }
+
+ swizzles[0] = bld->static_state->swizzle_r;
+ swizzles[1] = bld->static_state->swizzle_g;
+ swizzles[2] = bld->static_state->swizzle_b;
+ swizzles[3] = bld->static_state->swizzle_a;
+
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned swizzle = swizzles[chan];
+ texel[chan] = lp_build_swizzle_chan_soa(bld->texel_type,
+ unswizzled, swizzle);
+ }
+}
+
+
/**
* Generate code to fetch a texel from a texture at int coords (x, y, z).
@@ -278,6 +325,18 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
bld->format_desc,
x, y, z, y_stride, z_stride);
+ if (use_border) {
+ /* If we can sample the border color, it means that texcoords may
+ * lie outside the bounds of the texture image. We need to do
+ * something to prevent reading out of bounds and causing a segfault.
+ *
+ * Simply AND the texture coords with !use_border. This will cause
+ * coords which are out of bounds to become zero. Zero's guaranteed
+ * to be inside the texture image.
+ */
+ offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border);
+ }
+
lp_build_fetch_rgba_soa(bld->builder,
bld->format_desc,
bld->texel_type,
@@ -285,6 +344,8 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
i, j,
texel);
+ lp_build_swizzle_soa(bld, texel);
+
/*
* Note: if we find an app which frequently samples the texture border
* we might want to implement a true conditional here to avoid sampling
@@ -842,6 +903,7 @@ lp_build_minify(struct lp_build_sample_context *bld,
* \param s vector of texcoord s values
* \param t vector of texcoord t values
* \param r vector of texcoord r values
+ * \param shader_lod_bias vector float with the shader lod bias,
* \param width scalar int texture width
* \param height scalar int texture height
* \param depth scalar int texture depth
@@ -851,6 +913,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
+ LLVMValueRef shader_lod_bias,
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth)
@@ -865,8 +928,8 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
else {
const int dims = texture_dims(bld->static_state->target);
struct lp_build_context *float_bld = &bld->float_bld;
- LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(),
- bld->static_state->lod_bias);
+ LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
+ bld->static_state->lod_bias);
LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
bld->static_state->min_lod);
LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
@@ -940,8 +1003,14 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
/* compute lod = log2(rho) */
lod = lp_build_log2(float_bld, rho);
- /* add lod bias */
- lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
+ /* add sampler lod bias */
+ lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler LOD bias");
+
+ /* add shader lod bias */
+ /* XXX for now we take only the first element since our lod is scalar */
+ shader_lod_bias = LLVMBuildExtractElement(bld->builder, shader_lod_bias,
+ LLVMConstInt(LLVMInt32Type(), 0, 0), "");
+ lod = LLVMBuildAdd(bld->builder, lod, shader_lod_bias, "shader LOD bias");
/* clamp lod */
lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
@@ -1527,6 +1596,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
+ LLVMValueRef lodbias,
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth,
@@ -1564,7 +1634,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, s, t, r, width, height, depth);
+ lod = lp_build_lod_selector(bld, s, t, r, lodbias, width, height, depth);
}
/*
@@ -1772,6 +1842,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef unswizzled[4];
LLVMValueRef stride;
+ assert(bld->static_state->target == PIPE_TEXTURE_2D);
+ assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
+ assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
+ assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
+
lp_build_context_init(&i32, builder, lp_type_int_vec(32));
lp_build_context_init(&h16, builder, lp_type_ufixed(16));
lp_build_context_init(&u8n, builder, lp_type_unorm(8));
@@ -1945,6 +2020,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
lp_build_format_swizzle_soa(bld->format_desc,
bld->texel_type, unswizzled,
texel);
+
+ lp_build_swizzle_soa(bld, texel);
}
@@ -1984,6 +2061,24 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
/**
+ * Just set texels to white instead of actually sampling the texture.
+ * For debugging.
+ */
+static void
+lp_build_sample_nop(struct lp_build_sample_context *bld,
+ LLVMValueRef *texel)
+{
+ struct lp_build_context *texel_bld = &bld->texel_bld;
+ unsigned chan;
+
+ for (chan = 0; chan < 4; chan++) {
+ /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
+ texel[chan] = texel_bld->one;
+ }
+}
+
+
+/**
* Build texture sampling code.
* 'texel' will return a vector of four LLVMValueRefs corresponding to
* R, G, B, A.
@@ -2048,19 +2143,23 @@ lp_build_sample_soa(LLVMBuilderRef builder,
height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
- if (util_format_is_rgba8_variant(bld.format_desc) &&
- static_state->target == PIPE_TEXTURE_2D &&
- static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
- static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
- static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
- is_simple_wrap_mode(static_state->wrap_s) &&
- is_simple_wrap_mode(static_state->wrap_t)) {
+ if (0) {
+ /* For debug: no-op texture sampling */
+ lp_build_sample_nop(&bld, texel);
+ }
+ else if (util_format_is_rgba8_variant(bld.format_desc) &&
+ static_state->target == PIPE_TEXTURE_2D &&
+ static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
+ static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
+ static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
+ is_simple_wrap_mode(static_state->wrap_s) &&
+ is_simple_wrap_mode(static_state->wrap_t)) {
/* special case */
lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
row_stride_array, data_array, texel);
}
else {
- lp_build_sample_general(&bld, unit, s, t, r,
+ lp_build_sample_general(&bld, unit, s, t, r, lodbias,
width, height, depth,
width_vec, height_vec, depth_vec,
row_stride_array, img_stride_array,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 63b938bfa9..2eac5da6c6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -39,6 +39,7 @@
struct tgsi_token;
+struct tgsi_shader_info;
struct lp_type;
struct lp_build_context;
struct lp_build_mask_context;
@@ -78,7 +79,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[4],
LLVMValueRef (*outputs)[4],
- struct lp_build_sampler_soa *sampler);
+ struct lp_build_sampler_soa *sampler,
+ struct tgsi_shader_info *info);
#endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 44f8aec1bf..d3c769e28b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -46,6 +46,7 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_scan.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_arit.h"
@@ -125,6 +126,12 @@ struct lp_build_tgsi_soa_context
LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
+ LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS];
+
+ /* we allocate an array of temps if we have indirect
+ * addressing and then the temps above is unused */
+ LLVMValueRef temps_array;
+ boolean has_indirect_addressing;
struct lp_build_mask_context *mask;
struct lp_exec_mask exec_mask;
@@ -169,8 +176,7 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context
static void lp_exec_mask_update(struct lp_exec_mask *mask)
{
if (mask->loop_stack_size) {
- /*for loops we need to update the entire mask at
- * runtime */
+ /*for loops we need to update the entire mask at runtime */
LLVMValueRef tmp;
assert(mask->break_mask);
tmp = LLVMBuildAnd(mask->bld->builder,
@@ -232,6 +238,9 @@ static void lp_exec_bgnloop(struct lp_exec_mask *mask)
mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
if (mask->cond_stack_size == 0)
mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
+
+ mask->break_stack[mask->break_stack_size++] = mask->break_mask;
+ mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
LLVMBuildBr(mask->bld->builder, mask->loop_block);
@@ -246,7 +255,10 @@ static void lp_exec_break(struct lp_exec_mask *mask)
mask->exec_mask,
"break");
- mask->break_stack[mask->break_stack_size++] = mask->break_mask;
+ /* mask->break_stack_size > 1 implies that we encountered a break
+ * statemant already and if that's the case we want to make sure
+ * our mask is a combination of the previous break and the current
+ * execution mask */
if (mask->break_stack_size > 1) {
mask->break_mask = LLVMBuildAnd(mask->bld->builder,
mask->break_mask,
@@ -263,7 +275,6 @@ static void lp_exec_continue(struct lp_exec_mask *mask)
mask->exec_mask,
"");
- mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
if (mask->cont_stack_size > 1) {
mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
mask->cont_mask,
@@ -299,17 +310,23 @@ static void lp_exec_endloop(struct lp_exec_mask *mask)
LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
- /* pop the break mask */
+ /* pop the cont mask */
if (mask->cont_stack_size) {
mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
}
+ /* pop the break mask */
if (mask->break_stack_size) {
- mask->break_mask = mask->cont_stack[--mask->break_stack_size];
+ mask->break_mask = mask->break_stack[--mask->break_stack_size];
}
lp_exec_mask_update(mask);
}
+/* stores val into an address pointed to by dst.
+ * mask->exec_mask is used to figure out which bits of val
+ * should be stored into the address
+ * (0 means don't store this bit, 1 means do store).
+ */
static void lp_exec_mask_store(struct lp_exec_mask *mask,
LLVMValueRef val,
LLVMValueRef dst)
@@ -347,6 +364,23 @@ emit_ddy(struct lp_build_tgsi_soa_context *bld,
return lp_build_sub(&bld->base, src_top, src_bottom);
}
+static LLVMValueRef
+get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
+ unsigned index,
+ unsigned swizzle,
+ boolean is_indirect,
+ LLVMValueRef addr)
+{
+ if (!bld->has_indirect_addressing) {
+ return bld->temps[index][swizzle];
+ } else {
+ LLVMValueRef lindex =
+ LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
+ if (is_indirect)
+ lindex = lp_build_add(&bld->base, lindex, addr);
+ return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
+ }
+}
/**
* Register fetch.
@@ -361,6 +395,7 @@ emit_fetch(
const struct tgsi_full_src_register *reg = &inst->Src[index];
unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
LLVMValueRef res;
+ LLVMValueRef addr;
switch (swizzle) {
case TGSI_SWIZZLE_X:
@@ -368,11 +403,34 @@ emit_fetch(
case TGSI_SWIZZLE_Z:
case TGSI_SWIZZLE_W:
+ if (reg->Register.Indirect) {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
+ unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
+ addr = LLVMBuildLoad(bld->base.builder,
+ bld->addr[reg->Indirect.Index][swizzle],
+ "");
+ /* for indexing we want integers */
+ addr = LLVMBuildFPToSI(bld->base.builder, addr,
+ int_vec_type, "");
+ addr = LLVMBuildExtractElement(bld->base.builder,
+ addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
+ "");
+ addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+ }
+
switch (reg->Register.File) {
case TGSI_FILE_CONSTANT: {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
- LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
- LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+ LLVMValueRef scalar, scalar_ptr;
+
+ if (reg->Register.Indirect) {
+ /*lp_build_printf(bld->base.builder,
+ "\taddr = %d\n", addr);*/
+ index = lp_build_add(&bld->base, index, addr);
+ }
+ scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
+ scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+
res = lp_build_broadcast_scalar(&bld->base, scalar);
break;
}
@@ -387,11 +445,16 @@ emit_fetch(
assert(res);
break;
- case TGSI_FILE_TEMPORARY:
- res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
+ case TGSI_FILE_TEMPORARY: {
+ LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+ swizzle,
+ reg->Register.Indirect,
+ addr);
+ res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
if(!res)
return bld->base.undef;
break;
+ }
default:
assert( 0 );
@@ -469,6 +532,7 @@ emit_store(
LLVMValueRef value)
{
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
+ LLVMValueRef addr;
switch( inst->Instruction.Saturate ) {
case TGSI_SAT_NONE:
@@ -488,20 +552,39 @@ emit_store(
assert(0);
}
+ if (reg->Register.Indirect) {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
+ unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
+ addr = LLVMBuildLoad(bld->base.builder,
+ bld->addr[reg->Indirect.Index][swizzle],
+ "");
+ /* for indexing we want integers */
+ addr = LLVMBuildFPToSI(bld->base.builder, addr,
+ int_vec_type, "");
+ addr = LLVMBuildExtractElement(bld->base.builder,
+ addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
+ "");
+ addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+ }
+
switch( reg->Register.File ) {
case TGSI_FILE_OUTPUT:
lp_exec_mask_store(&bld->exec_mask, value,
bld->outputs[reg->Register.Index][chan_index]);
break;
- case TGSI_FILE_TEMPORARY:
- lp_exec_mask_store(&bld->exec_mask, value,
- bld->temps[reg->Register.Index][chan_index]);
+ case TGSI_FILE_TEMPORARY: {
+ LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+ chan_index,
+ reg->Register.Indirect,
+ addr);
+ lp_exec_mask_store(&bld->exec_mask, value, temp_ptr);
break;
+ }
case TGSI_FILE_ADDRESS:
- /* FIXME */
- assert(0);
+ lp_exec_mask_store(&bld->exec_mask, value,
+ bld->addr[reg->Indirect.Index][chan_index]);
break;
case TGSI_FILE_PREDICATE:
@@ -656,62 +739,42 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
lp_build_mask_update(bld->mask, mask);
}
-
-/**
- * Check if inst src/dest regs use indirect addressing into temporary
- * register file.
- */
-static boolean
-indirect_temp_reference(const struct tgsi_full_instruction *inst)
-{
- uint i;
- for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- const struct tgsi_full_src_register *reg = &inst->Src[i];
- if (reg->Register.File == TGSI_FILE_TEMPORARY &&
- reg->Register.Indirect)
- return TRUE;
- }
- for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
- const struct tgsi_full_dst_register *reg = &inst->Dst[i];
- if (reg->Register.File == TGSI_FILE_TEMPORARY &&
- reg->Register.Indirect)
- return TRUE;
- }
- return FALSE;
-}
-
static int
emit_declaration(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_declaration *decl)
{
+ LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
+
unsigned first = decl->Range.First;
unsigned last = decl->Range.Last;
unsigned idx, i;
- LLVMBasicBlockRef current_block =
- LLVMGetInsertBlock(bld->base.builder);
- LLVMBasicBlockRef first_block =
- LLVMGetEntryBasicBlock(
- LLVMGetBasicBlockParent(current_block));
- LLVMValueRef first_inst =
- LLVMGetFirstInstruction(first_block);
-
- /* we want alloca's to be the first instruction
- * in the function so we need to rewind the builder
- * to the very beginning */
- LLVMPositionBuilderBefore(bld->base.builder,
- first_inst);
for (idx = first; idx <= last; ++idx) {
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
- for (i = 0; i < NUM_CHANNELS; i++)
- bld->temps[idx][i] = lp_build_alloca(&bld->base);
+ if (bld->has_indirect_addressing) {
+ LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
+ last*4 + 4, 0);
+ bld->temps_array = lp_build_array_alloca(bld->base.builder,
+ vec_type, val, "");
+ } else {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ }
break;
case TGSI_FILE_OUTPUT:
for (i = 0; i < NUM_CHANNELS; i++)
- bld->outputs[idx][i] = lp_build_alloca(&bld->base);
+ bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
break;
default:
@@ -720,8 +783,6 @@ emit_declaration(
}
}
- LLVMPositionBuilderAtEnd(bld->base.builder,
- current_block);
return TRUE;
}
@@ -747,10 +808,6 @@ emit_instruction(
LLVMValueRef res;
LLVMValueRef dst0[NUM_CHANNELS];
- /* we can't handle indirect addressing into temp register file yet */
- if (indirect_temp_reference(inst))
- return FALSE;
-
/*
* Stores and write masks are handled in a general fashion after the long
* instruction opcode switch statement.
@@ -770,17 +827,13 @@ emit_instruction(
}
switch (inst->Instruction.Opcode) {
-#if 0
case TGSI_OPCODE_ARL:
- /* FIXME */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_flr(bld, 0, 0);
- emit_f2it( bld, 0 );
+ tmp0 = lp_build_floor(&bld->base, tmp0);
dst0[chan_index] = tmp0;
}
break;
-#endif
case TGSI_OPCODE_MOV:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
@@ -1350,17 +1403,13 @@ emit_instruction(
return FALSE;
break;
-#if 0
case TGSI_OPCODE_ARR:
- /* FIXME */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_rnd( bld, 0, 0 );
- emit_f2it( bld, 0 );
+ tmp0 = lp_build_round(&bld->base, tmp0);
dst0[chan_index] = tmp0;
}
break;
-#endif
case TGSI_OPCODE_BRA:
/* deprecated */
@@ -1540,22 +1589,10 @@ emit_instruction(
lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
break;
- case TGSI_OPCODE_BGNFOR:
- /* deprecated */
- assert(0);
- return FALSE;
- break;
-
case TGSI_OPCODE_BGNLOOP:
lp_exec_bgnloop(&bld->exec_mask);
break;
- case TGSI_OPCODE_REP:
- /* deprecated */
- assert(0);
- return FALSE;
- break;
-
case TGSI_OPCODE_ELSE:
lp_exec_mask_cond_invert(&bld->exec_mask);
break;
@@ -1564,22 +1601,10 @@ emit_instruction(
lp_exec_mask_cond_pop(&bld->exec_mask);
break;
- case TGSI_OPCODE_ENDFOR:
- /* deprecated */
- assert(0);
- return FALSE;
- break;
-
case TGSI_OPCODE_ENDLOOP:
lp_exec_endloop(&bld->exec_mask);
break;
- case TGSI_OPCODE_ENDREP:
- /* deprecated */
- assert(0);
- return FALSE;
- break;
-
case TGSI_OPCODE_PUSHA:
/* deprecated? */
assert(0);
@@ -1710,7 +1735,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[NUM_CHANNELS],
LLVMValueRef (*outputs)[NUM_CHANNELS],
- struct lp_build_sampler_soa *sampler)
+ struct lp_build_sampler_soa *sampler,
+ struct tgsi_shader_info *info)
{
struct lp_build_tgsi_soa_context bld;
struct tgsi_parse_context parse;
@@ -1726,6 +1752,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
bld.sampler = sampler;
+ bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
+ info->opcode_count[TGSI_OPCODE_ARL] > 0;
lp_exec_mask_init(&bld.exec_mask, &bld.base);
@@ -1746,10 +1774,10 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
case TGSI_TOKEN_TYPE_INSTRUCTION:
{
unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
- const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
- if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
+ const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
+ if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
_debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
- info ? info->mnemonic : "<invalid>");
+ opcode_info->mnemonic);
}
break;
diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h
index 07a4268fc0..c09e8a7a76 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -302,6 +302,7 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
typedef struct {
unsigned count;
unsigned waiters;
+ uint64_t sequence;
pipe_mutex mutex;
pipe_condvar condvar;
} pipe_barrier;
@@ -310,6 +311,7 @@ static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
{
barrier->count = count;
barrier->waiters = 0;
+ barrier->sequence = 0;
pipe_mutex_init(barrier->mutex);
pipe_condvar_init(barrier->condvar);
}
@@ -329,9 +331,14 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
barrier->waiters++;
if (barrier->waiters < barrier->count) {
- pipe_condvar_wait(barrier->condvar, barrier->mutex);
+ uint64_t sequence = barrier->sequence;
+
+ do {
+ pipe_condvar_wait(barrier->condvar, barrier->mutex);
+ } while (sequence == barrier->sequence);
} else {
barrier->waiters = 0;
+ barrier->sequence++;
pipe_condvar_broadcast(barrier->condvar);
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
index 080fd4c731..5d9eed9258 100644
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
@@ -661,25 +661,6 @@ TGSI Instruction Specification
TBD
-1.9.8 BGNFOR - Begin a For-Loop
-
- dst.x = floor(src.x)
- dst.y = floor(src.y)
- dst.z = floor(src.z)
-
- if (dst.y <= 0)
- pc = [matching ENDFOR] + 1
- endif
-
- Note: The destination must be a loop register.
- The source must be a constant register.
-
-
-1.9.9 REP - Repeat
-
- TBD
-
-
1.9.10 ELSE - Else
TBD
@@ -690,23 +671,6 @@ TGSI Instruction Specification
TBD
-1.9.12 ENDFOR - End a For-Loop
-
- dst.x = dst.x + dst.z
- dst.y = dst.y - 1.0
-
- if (dst.y > 0)
- pc = [matching BGNFOR instruction] + 1
- endif
-
- Note: The destination must be a loop register.
-
-
-1.9.13 ENDREP - End Repeat
-
- TBD
-
-
1.10 GL_NV_vertex_program3
---------------------------
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 57031419f8..8300020018 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -586,7 +586,6 @@ iter_instruction(
/* update indentation */
if (inst->Instruction.Opcode == TGSI_OPCODE_IF ||
inst->Instruction.Opcode == TGSI_OPCODE_ELSE ||
- inst->Instruction.Opcode == TGSI_OPCODE_BGNFOR ||
inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
ctx->indentation += indent_spaces;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 11045e4ba2..82eac05dc4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3186,14 +3186,6 @@ exec_instruction(
*pc = -1;
break;
- case TGSI_OPCODE_REP:
- assert (0);
- break;
-
- case TGSI_OPCODE_ENDREP:
- assert (0);
- break;
-
case TGSI_OPCODE_PUSHA:
assert (0);
break;
@@ -3258,29 +3250,6 @@ exec_instruction(
emit_primitive(mach);
break;
- case TGSI_OPCODE_BGNFOR:
- assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- for (chan_index = 0; chan_index < 3; chan_index++) {
- FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
- }
- ++mach->LoopCounterStackTop;
- STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
- /* update LoopMask */
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
- mach->LoopMask &= ~0x1;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
- mach->LoopMask &= ~0x2;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
- mach->LoopMask &= ~0x4;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
- mach->LoopMask &= ~0x8;
- }
- /* TODO: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
- /* fall-through (for now) */
case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
@@ -3295,56 +3264,6 @@ exec_instruction(
mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
break;
- case TGSI_OPCODE_ENDFOR:
- assert(mach->LoopCounterStackTop > 0);
- micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
- /* update LoopMask */
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
- mach->LoopMask &= ~0x1;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
- mach->LoopMask &= ~0x2;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
- mach->LoopMask &= ~0x4;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
- mach->LoopMask &= ~0x8;
- }
- micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
- assert(mach->LoopLabelStackTop > 0);
- inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
- STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
- /* Restore ContMask, but don't pop */
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
- UPDATE_EXEC_MASK(mach);
- if (mach->ExecMask) {
- /* repeat loop: jump to instruction just past BGNLOOP */
- assert(mach->LoopLabelStackTop > 0);
- *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
- }
- else {
- /* exit loop: pop LoopMask */
- assert(mach->LoopStackTop > 0);
- mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- /* pop ContMask */
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[--mach->ContStackTop];
- assert(mach->LoopLabelStackTop > 0);
- --mach->LoopLabelStackTop;
- assert(mach->LoopCounterStackTop > 0);
- --mach->LoopCounterStackTop;
-
- mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
- }
- UPDATE_EXEC_MASK(mach);
- break;
-
case TGSI_OPCODE_ENDLOOP:
/* Restore ContMask, but don't pop */
assert(mach->ContStackTop > 0);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index de0e09cdba..cfa2f631bd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -106,12 +106,12 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL },
{ 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK },
{ 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF },
- { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR },
- { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP },
+ { 1, 1, 0, 0, 0, 1, "", 75 }, /* removed */
+ { 0, 1, 0, 0, 0, 1, "", 76 }, /* removed */
{ 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE },
{ 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF },
- { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR },
- { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP },
+ { 1, 0, 0, 0, 1, 0, "", 79 }, /* removed */
+ { 0, 0, 0, 0, 1, 0, "", 80 }, /* removed */
{ 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA },
{ 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA },
{ 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index e4af15c156..e472947507 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -111,12 +111,8 @@ OP12(DP2)
OP12_TEX(TXL)
OP00(BRK)
OP01_LBL(IF)
-OP11(BGNFOR)
-OP01(REP)
OP00_LBL(ELSE)
OP00(ENDIF)
-OP10(ENDFOR)
-OP00(ENDREP)
OP01(PUSHA)
OP10(POPA)
OP11(CEIL)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 371f690b29..76b7564cc3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -346,25 +346,6 @@ iter_instruction(
}
}
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_BGNFOR:
- case TGSI_OPCODE_ENDFOR:
- if (inst->Dst[0].Register.File != TGSI_FILE_LOOP ||
- inst->Dst[0].Register.Index != 0) {
- report_error(ctx, "Destination register must be LOOP[0]");
- }
- break;
- }
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_BGNFOR:
- if (inst->Src[0].Register.File != TGSI_FILE_CONSTANT &&
- inst->Src[0].Register.File != TGSI_FILE_IMMEDIATE) {
- report_error(ctx, "Source register file must be either CONST or IMM");
- }
- break;
- }
-
ctx->num_instructions++;
return TRUE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index a85cc4659e..1071298b49 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2533,14 +2533,6 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_BGNFOR:
- return 0;
- break;
-
- case TGSI_OPCODE_REP:
- return 0;
- break;
-
case TGSI_OPCODE_ELSE:
return 0;
break;
@@ -2549,14 +2541,6 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_ENDFOR:
- return 0;
- break;
-
- case TGSI_OPCODE_ENDREP:
- return 0;
- break;
-
case TGSI_OPCODE_PUSHA:
return 0;
break;
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index 54ed2c1a4b..edd95e0788 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -76,7 +76,8 @@ struct translate {
void (*set_buffer)( struct translate *,
unsigned i,
const void *ptr,
- unsigned stride );
+ unsigned stride,
+ unsigned max_index );
void (PIPE_CDECL *run_elts)( struct translate *,
const unsigned *elts,
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index c3ec9ae3f4..a9272fbb49 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -31,6 +31,7 @@
*/
#include "util/u_memory.h"
+#include "util/u_math.h"
#include "pipe/p_state.h"
#include "translate.h"
@@ -58,6 +59,7 @@ struct translate_generic {
char *input_ptr;
unsigned input_stride;
+ unsigned max_index;
} attrib[PIPE_MAX_ATTRIBS];
@@ -588,19 +590,22 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
const char *src;
+ unsigned index;
char *dst = (vert +
tg->attrib[attr].output_offset);
if (tg->attrib[attr].instance_divisor) {
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride *
- (instance_id / tg->attrib[attr].instance_divisor);
+ index = instance_id / tg->attrib[attr].instance_divisor;
} else {
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * elt;
+ index = elt;
}
+ index = MIN2(index, tg->attrib[attr].max_index);
+
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * index;
+
tg->attrib[attr].fetch( src, data );
if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
@@ -670,7 +675,8 @@ static void PIPE_CDECL generic_run( struct translate *translate,
static void generic_set_buffer( struct translate *translate,
unsigned buf,
const void *ptr,
- unsigned stride )
+ unsigned stride,
+ unsigned max_index )
{
struct translate_generic *tg = translate_generic(translate);
unsigned i;
@@ -680,6 +686,7 @@ static void generic_set_buffer( struct translate *translate,
tg->attrib[i].input_ptr = ((char *)ptr +
tg->attrib[i].input_offset);
tg->attrib[i].input_stride = stride;
+ tg->attrib[i].max_index = max_index;
}
}
}
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index c13e742738..ef3aa674a3 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -61,6 +61,7 @@ typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
struct translate_buffer {
const void *base_ptr;
unsigned stride;
+ unsigned max_index;
};
struct translate_buffer_varient {
@@ -423,6 +424,11 @@ static boolean init_inputs( struct translate_sse *p,
} else {
x86_mov(p->func, tmp_EAX, elt);
}
+
+ /*
+ * TODO: Respect translate_buffer::max_index.
+ */
+
x86_imul(p->func, tmp_EAX, buf_stride);
x86_add(p->func, tmp_EAX, buf_base_ptr);
@@ -666,13 +672,15 @@ static boolean build_vertex_emit( struct translate_sse *p,
static void translate_sse_set_buffer( struct translate *translate,
unsigned buf,
const void *ptr,
- unsigned stride )
+ unsigned stride,
+ unsigned max_index )
{
struct translate_sse *p = (struct translate_sse *)translate;
if (buf < p->nr_buffers) {
p->buffer[buf].base_ptr = (char *)ptr;
p->buffer[buf].stride = stride;
+ p->buffer[buf].max_index = max_index;
}
if (0) debug_printf("%s %d/%d: %p %d\n",
diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c
new file mode 100644
index 0000000000..c7c1e830e0
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_caps.c
@@ -0,0 +1,245 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+#include "util/u_debug.h"
+#include "u_caps.h"
+
+/**
+ * Iterates over a list of caps checks as defined in u_caps.h. Should
+ * all checks pass returns TRUE and out is set to the last element of
+ * the list (TERMINATE). Should any check fail returns FALSE and set
+ * out to the index of the start of the first failing check.
+ */
+boolean
+util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out)
+{
+ int i, tmpi;
+ float tmpf;
+
+ for (i = 0; list[i];) {
+ switch(list[i++]) {
+ case UTIL_CAPS_CHECK_CAP:
+ if (!screen->get_param(screen, list[i++])) {
+ *out = i - 2;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_INT:
+ tmpi = screen->get_param(screen, list[i++]);
+ if (tmpi < (int)list[i++]) {
+ *out = i - 3;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_FLOAT:
+ tmpf = screen->get_paramf(screen, list[i++]);
+ if (tmpf < (float)list[i++]) {
+ *out = i - 3;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_FORMAT:
+ if (!screen->is_format_supported(screen,
+ list[i++],
+ PIPE_TEXTURE_2D,
+ PIPE_BIND_SAMPLER_VIEW,
+ 0)) {
+ *out = i - 2;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_UNIMPLEMENTED:
+ *out = i - 1;
+ return FALSE;
+ default:
+ assert(!"Unsupported check");
+ return FALSE;
+ }
+ }
+
+ *out = i;
+ return TRUE;
+}
+
+/**
+ * Iterates over a list of caps checks as defined in u_caps.h.
+ * Returns TRUE if all caps checks pass returns FALSE otherwise.
+ */
+boolean
+util_check_caps(struct pipe_screen *screen, const unsigned *list)
+{
+ int out;
+ return util_check_caps_out(screen, list, &out);
+}
+
+
+/*
+ * Below follows some demo lists.
+ *
+ * None of these lists are exhausting lists of what is
+ * actually needed to support said API and more here for
+ * as example on how to uses the above functions. Especially
+ * for DX10 and DX11 where Gallium is missing features.
+ */
+
+/* DX 9_1 */
+static unsigned caps_dx_9_1[] = {
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 2),
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX 9_2 */
+static unsigned caps_dx_9_2[] = {
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX 9_3 */
+static unsigned caps_dx_9_3[] = {
+ UTIL_CHECK_CAP(SM3),
+ //UTIL_CHECK_CAP(INSTANCING),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 4),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 13), /* 4096 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX 10 */
+static unsigned caps_dx_10[] = {
+ UTIL_CHECK_CAP(SM3),
+ //UTIL_CHECK_CAP(INSTANCING),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 8192 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 8192 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX11 */
+static unsigned caps_dx_11[] = {
+ UTIL_CHECK_CAP(SM3),
+ //UTIL_CHECK_CAP(INSTANCING),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 16384 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 16384 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_FORMAT(B8G8R8A8_UNORM),
+ UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */
+ UTIL_CHECK_TERMINATE
+};
+
+/* OpenGL 2.1 */
+static unsigned caps_opengl_2_1[] = {
+ UTIL_CHECK_CAP(GLSL),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_CAP(TWO_SIDED_STENCIL),
+ UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 2),
+ UTIL_CHECK_TERMINATE
+};
+
+/* OpenGL 3.0 */
+/* UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), */
+
+
+/**
+ * Demo function which checks against theoretical caps needed for different APIs.
+ */
+void util_caps_demo_print(struct pipe_screen *screen)
+{
+ struct {
+ char* name;
+ unsigned *list;
+ } list[] = {
+ {"DX 9.1", caps_dx_9_1},
+ {"DX 9.2", caps_dx_9_2},
+ {"DX 9.3", caps_dx_9_3},
+ {"DX 10", caps_dx_10},
+ {"DX 11", caps_dx_11},
+ {"OpenGL 2.1", caps_opengl_2_1},
+/* {"OpenGL 3.0", caps_opengl_3_0},*/
+ {NULL, NULL}
+ };
+ int i, out = 0;
+
+ for (i = 0; list[i].name; i++) {
+ if (util_check_caps_out(screen, list[i].list, &out)) {
+ debug_printf("%s: %s yes\n", __FUNCTION__, list[i].name);
+ continue;
+ }
+ switch (list[i].list[out]) {
+ case UTIL_CAPS_CHECK_CAP:
+ debug_printf("%s: %s no (cap %u not supported)\n", __FUNCTION__,
+ list[i].name,
+ list[i].list[out + 1]);
+ break;
+ case UTIL_CAPS_CHECK_INT:
+ debug_printf("%s: %s no (cap %u less then %u)\n", __FUNCTION__,
+ list[i].name,
+ list[i].list[out + 1],
+ list[i].list[out + 2]);
+ break;
+ case UTIL_CAPS_CHECK_FLOAT:
+ debug_printf("%s: %s no (cap %u less then %f)\n", __FUNCTION__,
+ list[i].name,
+ list[i].list[out + 1],
+ (double)(int)list[i].list[out + 2]);
+ break;
+ case UTIL_CAPS_CHECK_FORMAT:
+ debug_printf("%s: %s no (format %s not supported)\n", __FUNCTION__,
+ list[i].name,
+ util_format_name(list[i].list[out + 1]) + 12);
+ break;
+ case UTIL_CAPS_CHECK_UNIMPLEMENTED:
+ debug_printf("%s: %s no (not implemented in gallium or state tracker)\n",
+ __FUNCTION__, list[i].name);
+ break;
+ default:
+ assert(!"Unsupported check");
+ }
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_caps.h b/src/gallium/auxiliary/util/u_caps.h
new file mode 100644
index 0000000000..b1074f9eb2
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_caps.h
@@ -0,0 +1,67 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_CAPS_H
+#define U_CAPS_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_screen;
+
+enum u_caps_check_enum {
+ UTIL_CAPS_CHECK_TERMINATE = 0,
+ UTIL_CAPS_CHECK_CAP,
+ UTIL_CAPS_CHECK_INT,
+ UTIL_CAPS_CHECK_FLOAT,
+ UTIL_CAPS_CHECK_FORMAT,
+ UTIL_CAPS_CHECK_UNIMPLEMENTED,
+};
+
+#define UTIL_CHECK_CAP(cap) \
+ UTIL_CAPS_CHECK_CAP, PIPE_CAP_##cap
+
+#define UTIL_CHECK_INT(cap, higher) \
+ UTIL_CAPS_CHECK_INT, PIPE_CAP_##cap, (unsigned)(higher)
+
+/* Floats currently lose precision */
+#define UTIL_CHECK_FLOAT(cap, higher) \
+ UTIL_CAPS_CHECK_FLOAT, PIPE_CAP_##cap, (unsigned)(int)(higher)
+
+#define UTIL_CHECK_FORMAT(format) \
+ UTIL_CAPS_CHECK_FORMAT, PIPE_FORMAT_##format
+
+#define UTIL_CHECK_UNIMPLEMENTED \
+ UTIL_CAPS_CHECK_UNIMPLEMENTED
+
+#define UTIL_CHECK_TERMINATE \
+ UTIL_CAPS_CHECK_TERMINATE
+
+boolean util_check_caps(struct pipe_screen *screen, const unsigned *list);
+boolean util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out);
+void util_caps_demo_print(struct pipe_screen *screen);
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index dd044973f9..0de38e791d 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -74,6 +74,24 @@ void debug_print_blob( const char *name,
#endif
+static boolean
+debug_get_option_should_print(void)
+{
+ static boolean first = TRUE;
+ static boolean value = FALSE;
+
+ if (!first)
+ return value;
+
+ /* Oh hey this will call into this function,
+ * but its cool since we set first to false
+ */
+ first = FALSE;
+ value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", TRUE);
+ /* XXX should we print this option? Currently it wont */
+ return value;
+}
+
const char *
debug_get_option(const char *name, const char *dfault)
{
@@ -82,8 +100,9 @@ debug_get_option(const char *name, const char *dfault)
result = os_get_option(name);
if(!result)
result = dfault;
-
- debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)");
+
+ if (debug_get_option_should_print())
+ debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)");
return result;
}
@@ -109,7 +128,8 @@ debug_get_bool_option(const char *name, boolean dfault)
else
result = TRUE;
- debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE");
+ if (debug_get_option_should_print())
+ debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE");
return result;
}
@@ -142,8 +162,9 @@ debug_get_num_option(const char *name, long dfault)
}
result *= sign;
}
-
- debug_printf("%s: %s = %li\n", __FUNCTION__, name, result);
+
+ if (debug_get_option_should_print())
+ debug_printf("%s: %s = %li\n", __FUNCTION__, name, result);
return result;
}
@@ -176,11 +197,12 @@ debug_get_flags_option(const char *name,
}
}
- if (str) {
- debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
- }
- else {
- debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+ if (debug_get_option_should_print()) {
+ if (str) {
+ debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
+ } else {
+ debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+ }
}
return result;
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
index b6d0b508e3..e8ff2773e6 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -303,6 +303,45 @@ debug_get_flags_option(const char *name,
const struct debug_named_value *flags,
unsigned long dfault);
+#define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \
+static boolean \
+debug_get_option_ ## sufix (void) \
+{ \
+ static boolean first = TRUE; \
+ static boolean value; \
+ if (first) { \
+ first = FALSE; \
+ value = debug_get_bool_option(name, dfault); \
+ } \
+ return value; \
+}
+
+#define DEBUG_GET_ONCE_NUM_OPTION(sufix, name, dfault) \
+static long \
+debug_get_option_ ## sufix (void) \
+{ \
+ static boolean first = TRUE; \
+ static long value; \
+ if (first) { \
+ first = FALSE; \
+ value = debug_get_num_option(name, dfault); \
+ } \
+ return value; \
+}
+
+#define DEBUG_GET_ONCE_FLAGS_OPTION(sufix, name, flags, dfault) \
+static unsigned long \
+debug_get_option_ ## sufix (void) \
+{ \
+ static boolean first = TRUE; \
+ static unsigned long value; \
+ if (first) { \
+ first = FALSE; \
+ value = debug_get_flags_option(name, flags, dfault); \
+ } \
+ return value; \
+}
+
unsigned long
debug_memory_begin(void);
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index c134f13e90..2ce643e90c 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -656,12 +656,12 @@ util_dump_transfer(struct os_stream *stream, const struct pipe_transfer *state)
util_dump_struct_begin(stream, "pipe_transfer");
util_dump_member(stream, ptr, state, resource);
-// util_dump_member(stream, uint, state, box);
+ /*util_dump_member(stream, uint, state, box);*/
util_dump_member(stream, uint, state, stride);
util_dump_member(stream, uint, state, slice_stride);
-// util_dump_member(stream, ptr, state, data);
+ /*util_dump_member(stream, ptr, state, data);*/
util_dump_struct_end(stream);
}
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 5e3dc694be..fb6ade5c06 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -192,6 +192,7 @@ struct util_format_description
/**
* Unpack pixel blocks to R8G8B8A8_UNORM.
+ * Note: strides are in bytes.
*
* Only defined for non-depth-stencil formats.
*/
@@ -202,6 +203,7 @@ struct util_format_description
/**
* Pack pixel blocks from R8G8B8A8_UNORM.
+ * Note: strides are in bytes.
*
* Only defined for non-depth-stencil formats.
*/
@@ -212,6 +214,7 @@ struct util_format_description
/**
* Unpack pixel blocks to R32G32B32A32_FLOAT.
+ * Note: strides are in bytes.
*
* Only defined for non-depth-stencil formats.
*/
@@ -222,6 +225,7 @@ struct util_format_description
/**
* Pack pixel blocks from R32G32B32A32_FLOAT.
+ * Note: strides are in bytes.
*
* Only defined for non-depth-stencil formats.
*/
@@ -242,6 +246,7 @@ struct util_format_description
/**
* Unpack pixels to Z32_UNORM.
+ * Note: strides are in bytes.
*
* Only defined for depth formats.
*/
@@ -252,6 +257,7 @@ struct util_format_description
/**
* Pack pixels from Z32_FLOAT.
+ * Note: strides are in bytes.
*
* Only defined for depth formats.
*/
@@ -262,6 +268,7 @@ struct util_format_description
/**
* Unpack pixels to Z32_FLOAT.
+ * Note: strides are in bytes.
*
* Only defined for depth formats.
*/
@@ -272,6 +279,7 @@ struct util_format_description
/**
* Pack pixels from Z32_FLOAT.
+ * Note: strides are in bytes.
*
* Only defined for depth formats.
*/
@@ -282,6 +290,7 @@ struct util_format_description
/**
* Unpack pixels to S8_USCALED.
+ * Note: strides are in bytes.
*
* Only defined for stencil formats.
*/
@@ -292,6 +301,7 @@ struct util_format_description
/**
* Pack pixels from S8_USCALED.
+ * Note: strides are in bytes.
*
* Only defined for stencil formats.
*/
@@ -322,7 +332,7 @@ util_format_name(enum pipe_format format)
assert(desc);
if (!desc) {
- return "???";
+ return "PIPE_FORMAT_???";
}
return desc->name;
diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c
index 79dee2b423..5b279b8fe2 100644
--- a/src/gallium/auxiliary/util/u_format_s3tc.c
+++ b/src/gallium/auxiliary/util/u_format_s3tc.c
@@ -233,108 +233,80 @@ util_format_dxt5_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned
* Block decompression.
*/
-void
-util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
-{
+static INLINE void
+util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height,
+ util_format_dxtn_fetch_t fetch,
+ unsigned block_size)
+{
+ const unsigned bw = 4, bh = 4, comps = 4;
unsigned x, y, i, j;
- for(y = 0; y < height; y += 4) {
+ for(y = 0; y < height; y += bh) {
const uint8_t *src = src_row;
- for(x = 0; x < width; x += 4) {
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
- util_format_dxt1_rgb_fetch(0, src, i, j, dst);
+ for(x = 0; x < width; x += bw) {
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps;
+ fetch(0, src, i, j, dst);
}
}
- src += 8;
+ src += block_size;
}
src_row += src_stride;
}
}
void
-util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
{
- unsigned x, y, i, j;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
- for(x = 0; x < width; x += 4) {
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
- util_format_dxt1_rgba_fetch(0, src, i, j, dst);
- }
- }
- src += 8;
- }
- src_row += src_stride;
- }
+ util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt1_rgb_fetch, 8);
}
void
-util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
{
- unsigned x, y, i, j;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
- for(x = 0; x < width; x += 4) {
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
- util_format_dxt3_rgba_fetch(0, src, i, j, dst);
- }
- }
- src += 16;
- }
- src_row += src_stride;
- }
+ util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt1_rgba_fetch, 8);
}
void
-util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
{
- unsigned x, y, i, j;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
- for(x = 0; x < width; x += 4) {
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
- util_format_dxt5_rgba_fetch(0, src, i, j, dst);
- }
- }
- src += 16;
- }
- src_row += src_stride;
- }
+ util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt3_rgba_fetch, 16);
}
void
-util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
{
- unsigned x, y, i, j;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
- for(x = 0; x < width; x += 4) {
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
- uint8_t tmp[4];
- util_format_dxt1_rgb_fetch(0, src, i, j, tmp);
- dst[0] = ubyte_to_float(tmp[0]);
- dst[1] = ubyte_to_float(tmp[1]);
- dst[2] = ubyte_to_float(tmp[2]);
- dst[3] = 1.0;
- }
- }
- src += 8;
- }
- src_row += src_stride;
- }
+ util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt5_rgba_fetch, 16);
}
-void
-util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+static INLINE void
+util_format_dxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height,
+ util_format_dxtn_fetch_t fetch,
+ unsigned block_size)
{
unsigned x, y, i, j;
for(y = 0; y < height; y += 4) {
@@ -344,65 +316,61 @@ util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, con
for(i = 0; i < 4; ++i) {
float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
uint8_t tmp[4];
- util_format_dxt1_rgba_fetch(0, src, i, j, tmp);
+ fetch(0, src, i, j, tmp);
dst[0] = ubyte_to_float(tmp[0]);
dst[1] = ubyte_to_float(tmp[1]);
dst[2] = ubyte_to_float(tmp[2]);
dst[3] = ubyte_to_float(tmp[3]);
}
}
- src += 8;
+ src += block_size;
}
src_row += src_stride;
}
}
void
-util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
{
- unsigned x, y, i, j;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
- for(x = 0; x < width; x += 4) {
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
- uint8_t tmp[4];
- util_format_dxt3_rgba_fetch(0, src, i, j, tmp);
- dst[0] = ubyte_to_float(tmp[0]);
- dst[1] = ubyte_to_float(tmp[1]);
- dst[2] = ubyte_to_float(tmp[2]);
- dst[3] = ubyte_to_float(tmp[3]);
- }
- }
- src += 16;
- }
- src_row += src_stride;
- }
+ util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt1_rgb_fetch, 8);
}
void
-util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
{
- unsigned x, y, i, j;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
- for(x = 0; x < width; x += 4) {
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
- uint8_t tmp[4];
- util_format_dxt5_rgba_fetch(0, src, i, j, tmp);
- dst[0] = ubyte_to_float(tmp[0]);
- dst[1] = ubyte_to_float(tmp[1]);
- dst[2] = ubyte_to_float(tmp[2]);
- dst[3] = ubyte_to_float(tmp[3]);
- }
- }
- src += 16;
- }
- src_row += src_stride;
- }
+ util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt1_rgba_fetch, 8);
+}
+
+void
+util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt3_rgba_fetch, 16);
+}
+
+void
+util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height,
+ util_format_dxt5_rgba_fetch, 16);
}
@@ -411,201 +379,198 @@ util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, con
*/
void
-util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height)
{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 8;
unsigned x, y, i, j, k;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
+ for(y = 0; y < height; y += bh) {
uint8_t *dst = dst_row;
- for(x = 0; x < width; x += 4) {
- uint8_t tmp[4][4][3];
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][3]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
for(k = 0; k < 3; ++k) {
- tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*4 + k];
}
}
}
- util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride);
- src += 4*4;
- dst += 8;
+ util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0);
+ dst += bytes_per_block;
}
- src_row += src_stride;
- dst_row += 4*dst_stride/sizeof(*dst_row);
+ dst_row += dst_stride / sizeof(*dst_row);
}
}
void
-util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height)
{
+ const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 8;
unsigned x, y, i, j, k;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
+ for(y = 0; y < height; y += bh) {
uint8_t *dst = dst_row;
- for(x = 0; x < width; x += 4) {
- uint8_t tmp[4][4][4];
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ for(k = 0; k < comps; ++k) {
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride);
- src += 4*4;
- dst += 8;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0);
+ dst += bytes_per_block;
}
- src_row += src_stride;
- dst_row += 4*dst_stride/sizeof(*dst_row);
+ dst_row += dst_stride / sizeof(*dst_row);
}
}
void
-util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height)
{
+ const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16;
unsigned x, y, i, j, k;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
+ for(y = 0; y < height; y += bh) {
uint8_t *dst = dst_row;
- for(x = 0; x < width; x += 4) {
- uint8_t tmp[4][4][4];
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ for(k = 0; k < comps; ++k) {
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride);
- src += 4*4;
- dst += 16;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0);
+ dst += bytes_per_block;
}
- src_row += src_stride;
- dst_row += 4*dst_stride/sizeof(*dst_row);
+ dst_row += dst_stride / sizeof(*dst_row);
}
}
void
-util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src, unsigned src_stride,
+ unsigned width, unsigned height)
{
+ const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16;
unsigned x, y, i, j, k;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
+
+ for(y = 0; y < height; y += bh) {
uint8_t *dst = dst_row;
- for(x = 0; x < width; x += 4) {
- uint8_t tmp[4][4][4];
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ for(k = 0; k < comps; ++k) {
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride);
- src += 4*4;
- dst += 16;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0);
+ dst += bytes_per_block;
}
- src_row += src_stride;
- dst_row += 4*dst_stride/sizeof(*dst_row);
+ dst_row += dst_stride / sizeof(*dst_row);
}
}
void
-util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height)
{
unsigned x, y, i, j, k;
for(y = 0; y < height; y += 4) {
- const float *src = src_row;
uint8_t *dst = dst_row;
for(x = 0; x < width; x += 4) {
uint8_t tmp[4][4][3];
for(j = 0; j < 4; ++j) {
for(i = 0; i < 4; ++i) {
for(k = 0; k < 3; ++k) {
- tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
}
}
}
- util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride);
- src += 4*4;
+ util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0);
dst += 8;
}
- src_row += src_stride;
dst_row += 4*dst_stride/sizeof(*dst_row);
}
}
void
-util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height)
{
unsigned x, y, i, j, k;
for(y = 0; y < height; y += 4) {
- const float *src = src_row;
uint8_t *dst = dst_row;
for(x = 0; x < width; x += 4) {
uint8_t tmp[4][4][4];
for(j = 0; j < 4; ++j) {
for(i = 0; i < 4; ++i) {
for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride);
- src += 4*4;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0);
dst += 8;
}
- src_row += src_stride;
dst_row += 4*dst_stride/sizeof(*dst_row);
}
}
void
-util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height)
{
unsigned x, y, i, j, k;
for(y = 0; y < height; y += 4) {
- const float *src = src_row;
uint8_t *dst = dst_row;
for(x = 0; x < width; x += 4) {
uint8_t tmp[4][4][4];
for(j = 0; j < 4; ++j) {
for(i = 0; i < 4; ++i) {
for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride);
- src += 4*4;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0);
dst += 16;
}
- src_row += src_stride;
dst_row += 4*dst_stride/sizeof(*dst_row);
}
}
void
-util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height)
{
unsigned x, y, i, j, k;
for(y = 0; y < height; y += 4) {
- const float *src = src_row;
uint8_t *dst = dst_row;
for(x = 0; x < width; x += 4) {
uint8_t tmp[4][4][4];
for(j = 0; j < 4; ++j) {
for(i = 0; i < 4; ++i) {
for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride);
- src += 4*4;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0);
dst += 16;
}
- src_row += src_stride;
dst_row += 4*dst_stride/sizeof(*dst_row);
}
}
diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c
index 0be4609a20..668da8c5c2 100644
--- a/src/gallium/auxiliary/util/u_surfaces.c
+++ b/src/gallium/auxiliary/util/u_surfaces.c
@@ -9,13 +9,13 @@
static unsigned
hash(void *key)
{
- return (unsigned)key;
+ return (unsigned)(uintptr_t)key;
}
static int
compare(void *key1, void *key2)
{
- return (unsigned)key1 - (unsigned)key2;
+ return (unsigned)(uintptr_t)key1 - (unsigned)(uintptr_t)key2;
}
struct pipe_surface *
@@ -67,7 +67,7 @@ util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps)
struct pipe_resource *pt = ps->texture;
if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
{ /* or 2D array */
- void* key = (void*)(((ps->zslice + ps->face) << 8) | ps->level);
+ void* key = (void*)(uintptr_t)(((ps->zslice + ps->face) << 8) | ps->level);
util_hash_table_remove(us->u.table, key);
}
else
@@ -105,7 +105,7 @@ util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void (
if(ps)
destroy_surface(ps);
}
- free(us->u.array);
+ FREE(us->u.array);
us->u.array = NULL;
}
}
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index fe327c302b..f7aa1403d0 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -544,7 +544,7 @@ pipe_put_tile_z(struct pipe_context *pipe,
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
{
uint *pDest = (uint *) (map + y * pt->stride + x*4);
- //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);
+ /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
/* convert 32-bit Z to 24-bit Z, preserve stencil */
@@ -571,7 +571,7 @@ pipe_put_tile_z(struct pipe_context *pipe,
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
{
uint *pDest = (uint *) (map + y * pt->stride + x*4);
- //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);
+ /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
/* convert 32-bit Z to 24-bit Z, preserve stencil */