summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/Makefile1
-rw-r--r--src/gallium/auxiliary/SConscript1
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c5
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c155
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h12
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c38
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h10
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c25
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c5
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c18
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.c75
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.h10
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c7
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c12
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.h3
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c5
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c47
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt36
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c81
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c19
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c16
-rw-r--r--src/gallium/auxiliary/util/u_caps.c244
-rw-r--r--src/gallium/auxiliary/util/u_caps.h67
-rw-r--r--src/gallium/auxiliary/util/u_dump_state.c4
-rw-r--r--src/gallium/auxiliary/util/u_format.h2
-rw-r--r--src/gallium/auxiliary/util/u_format_s3tc.c154
-rw-r--r--src/gallium/auxiliary/util/u_tile.c4
35 files changed, 725 insertions, 362 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index f8e65cf6c6..2daed382cf 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -101,6 +101,7 @@ C_SOURCES = \
util/u_blit.c \
util/u_blitter.c \
util/u_cache.c \
+ util/u_caps.c \
util/u_cpu_detect.c \
util/u_dl.c \
util/u_draw_quad.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index db3a1e7311..a0673df8a8 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -144,6 +144,7 @@ source = [
'util/u_blit.c',
'util/u_blitter.c',
'util/u_cache.c',
+ 'util/u_caps.c',
'util/u_cpu_detect.c',
'util/u_debug.c',
'util/u_debug_memory.c',
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 030683f60f..55e0ded1b3 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -314,10 +314,13 @@ void cso_release_all( struct cso_context *ctx )
}
+/**
+ * Free the CSO context. NOTE: the state tracker should have previously called
+ * cso_release_all().
+ */
void cso_destroy_context( struct cso_context *ctx )
{
if (ctx) {
- /*cso_release_all( ctx );*/
FREE( ctx );
}
}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 703b46b518..3b2df054c3 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -24,6 +24,8 @@
/* generates the draw jit function */
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
+static void
+draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
static void
init_globals(struct draw_llvm *llvm)
@@ -218,6 +220,7 @@ draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs)
llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
draw_llvm_generate(llvm, variant);
+ draw_llvm_generate_elts(llvm, variant);
return variant;
}
@@ -696,6 +699,158 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
lp_disassemble(variant->jit_func);
}
+
+static void
+draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
+{
+ LLVMTypeRef arg_types[7];
+ LLVMTypeRef func_type;
+ LLVMValueRef context_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
+ LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+ struct draw_context *draw = llvm->draw;
+ unsigned i, j;
+ struct lp_build_context bld;
+ struct lp_build_context bld_int;
+ struct lp_build_loop_state lp_loop;
+ struct lp_type vs_type = lp_type_float_vec(32);
+ const int max_vertices = 4;
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+ LLVMValueRef fetch_max;
+
+ arg_types[0] = llvm->context_ptr_type; /* context */
+ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
+ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */
+ arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */
+ arg_types[4] = LLVMInt32Type(); /* fetch_count */
+ arg_types[5] = LLVMInt32Type(); /* stride */
+ arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
+
+ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+ variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
+ LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
+ for(i = 0; i < Elements(arg_types); ++i)
+ if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute);
+
+ context_ptr = LLVMGetParam(variant->function_elts, 0);
+ io_ptr = LLVMGetParam(variant->function_elts, 1);
+ vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
+ fetch_elts = LLVMGetParam(variant->function_elts, 3);
+ fetch_count = LLVMGetParam(variant->function_elts, 4);
+ stride = LLVMGetParam(variant->function_elts, 5);
+ vb_ptr = LLVMGetParam(variant->function_elts, 6);
+
+ lp_build_name(context_ptr, "context");
+ lp_build_name(io_ptr, "io");
+ lp_build_name(vbuffers_ptr, "vbuffers");
+ lp_build_name(fetch_elts, "fetch_elts");
+ lp_build_name(fetch_count, "fetch_count");
+ lp_build_name(stride, "stride");
+ lp_build_name(vb_ptr, "vb");
+
+ /*
+ * Function body
+ */
+
+ block = LLVMAppendBasicBlock(variant->function_elts, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ lp_build_context_init(&bld, builder, vs_type);
+ lp_build_context_init(&bld_int, builder, lp_type_int(32));
+
+ step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+
+ fetch_max = LLVMBuildSub(builder, fetch_count,
+ LLVMConstInt(LLVMInt32Type(), 1, 0),
+ "fetch_max");
+
+ lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
+ {
+ LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
+ LLVMValueRef io;
+ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
+
+ io_itr = lp_loop.counter;
+ io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
+#if DEBUG_STORE
+ lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
+ io_itr, io, lp_loop.counter);
+#endif
+ for (i = 0; i < NUM_CHANNELS; ++i) {
+ LLVMValueRef true_index = LLVMBuildAdd(
+ builder,
+ lp_loop.counter,
+ LLVMConstInt(LLVMInt32Type(), i, 0), "");
+ LLVMValueRef fetch_ptr;
+
+ /* make sure we're not out of bounds which can happen
+ * if fetch_count % 4 != 0, because on the last iteration
+ * a few of the 4 vertex fetches will be out of bounds */
+ true_index = lp_build_min(&bld_int, true_index, fetch_max);
+
+ fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
+ &true_index, 1, "");
+ true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
+ for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
+ struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
+ LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
+ velem->vertex_buffer_index,
+ 0);
+ LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
+ &vb_index, 1, "");
+ generate_fetch(builder, vbuffers_ptr,
+ &aos_attribs[j][i], velem, vb, true_index);
+ }
+ }
+ convert_to_soa(builder, aos_attribs, inputs,
+ draw->pt.nr_vertex_elements);
+
+ ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
+ generate_vs(llvm,
+ builder,
+ outputs,
+ ptr_aos,
+ context_ptr);
+
+ convert_to_aos(builder, io, outputs,
+ draw->vs.vertex_shader->info.num_outputs,
+ max_vertices);
+ }
+ lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
+
+ LLVMBuildRetVoid(builder);
+
+ LLVMDisposeBuilder(builder);
+
+ /*
+ * Translate the LLVM IR into machine code.
+ */
+#ifdef DEBUG
+ if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
+ LLVMDumpValue(variant->function_elts);
+ assert(0);
+ }
+#endif
+
+ LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
+
+ if (0) {
+ LLVMDumpValue(variant->function_elts);
+ debug_printf("\n");
+ }
+ variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal(
+ llvm->draw->engine, variant->function_elts);
+
+ if (0)
+ lp_disassemble(variant->jit_func_elts);
+}
+
void
draw_llvm_make_variant_key(struct draw_llvm *llvm,
struct draw_llvm_variant_key *key)
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index 20f4f3da5f..58fee7f9d6 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -94,6 +94,16 @@ typedef void
unsigned stride,
struct pipe_vertex_buffer *vertex_buffers);
+
+typedef void
+(*draw_jit_vert_func_elts)(struct draw_jit_context *context,
+ struct vertex_header *io,
+ const char *vbuffers[PIPE_MAX_ATTRIBS],
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ unsigned stride,
+ struct pipe_vertex_buffer *vertex_buffers);
+
struct draw_llvm {
struct draw_context *draw;
@@ -122,7 +132,9 @@ struct draw_llvm_variant
{
struct draw_llvm_variant_key key;
LLVMValueRef function;
+ LLVMValueRef function_elts;
draw_jit_vert_func jit_func;
+ draw_jit_vert_func_elts jit_func_elts;
struct draw_llvm_variant *next;
};
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 1d9395e06c..b853f3a89f 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -38,10 +38,10 @@
#include "util/u_prim.h"
-DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE);
-DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE);
+DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE)
#ifdef HAVE_LLVM
-DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE);
+DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
#endif
static unsigned trim( unsigned count, unsigned first, unsigned incr )
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index f71271bd91..d2a492f2b4 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -167,8 +167,6 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
- struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
unsigned opt = fpme->opt;
unsigned alloc_count = align( fetch_count, 4 );
@@ -182,35 +180,13 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
return;
}
- /* Fetch into our vertex buffer
- */
- draw_pt_fetch_run( fpme->fetch,
- fetch_elts,
- fetch_count,
- (char *)pipeline_verts );
-
- /* Run the shader, note that this overwrites the data[] parts of
- * the pipeline verts. If there is no shader, eg if
- * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
- * already in the correct place.*/
- if (opt & PT_SHADE)
- {
- vshader->run_linear(vshader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.vs_constants,
- fetch_count,
- fpme->vertex_size,
- fpme->vertex_size);
- if (gshader)
- draw_geometry_shader_run(gshader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.gs_constants,
- fetch_count,
- fpme->vertex_size,
- fpme->vertex_size);
- }
+ fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
+ pipeline_verts,
+ (const char **)draw->pt.user.vbuffer,
+ fetch_elts,
+ fetch_count,
+ fpme->vertex_size,
+ draw->pt.vertex_buffer );
if (draw_pt_post_vs_run( fpme->post_vs,
pipeline_verts,
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 0c05957350..c2832eefa2 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -46,7 +46,7 @@
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_exec.h"
-DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE);
+DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE)
void
draw_vs_set_constants(struct draw_context *draw,
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index efead42335..6c7e94db43 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -169,8 +169,9 @@ draw_create_vs_ppc(struct draw_context *draw,
struct draw_vs_varient_key;
struct draw_vertex_shader;
-struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key );
+struct draw_vs_varient *
+draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
@@ -188,8 +189,9 @@ struct translate *draw_vs_get_fetch( struct draw_context *draw,
struct translate *draw_vs_get_emit( struct draw_context *draw,
struct translate_key *key );
-struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key );
+struct draw_vs_varient *
+draw_vs_create_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index e7121f3654..19f49e34c8 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -2089,13 +2089,21 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
}
+/** cast wrapper */
+static INLINE struct draw_vs_varient_aos_sse *
+draw_vs_varient_aos_sse(struct draw_vs_varient *varient)
+{
+ return (struct draw_vs_varient_aos_sse *) varient;
+}
+
static void vaos_set_buffer( struct draw_vs_varient *varient,
unsigned buf,
const void *ptr,
- unsigned stride )
+ unsigned stride,
+ unsigned max_stride)
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
if (buf < vaos->nr_vb) {
vaos->buffer[buf].base_ptr = (char *)ptr;
@@ -2112,7 +2120,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
unsigned count,
void *output_buffer )
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
struct aos_machine *machine = vaos->draw->vs.aos_machine;
unsigned i;
@@ -2136,7 +2144,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
unsigned count,
void *output_buffer )
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
struct aos_machine *machine = vaos->draw->vs.aos_machine;
unsigned i;
@@ -2165,7 +2173,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
static void vaos_destroy( struct draw_vs_varient *varient )
{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient);
FREE( vaos->buffer );
@@ -2241,13 +2249,14 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
}
-struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key )
+struct draw_vs_varient *
+draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
{
struct draw_vs_varient *varient = varient_aos_sse( vs, key );
if (varient == NULL) {
- varient = draw_vs_varient_generic( vs, key );
+ varient = draw_vs_create_varient_generic( vs, key );
}
return varient;
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 7deca2b69d..bc34d390da 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -203,7 +203,7 @@ draw_create_vs_exec(struct draw_context *draw,
vs->base.prepare = vs_exec_prepare;
vs->base.run_linear = vs_exec_run_linear;
vs->base.delete = vs_exec_delete;
- vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.create_varient = draw_vs_create_varient_generic;
vs->machine = draw->vs.machine;
return &vs->base;
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index d869eecec5..5df84916c5 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -125,7 +125,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
*/
shader->func(inputs_soa, outputs_soa, temps_soa,
(float (*)[4]) shader->base.immediates,
- (const float (*)[4])constants[0],
+ (float (*)[4])constants[0],
ppc_builtin_constants);
/* convert (up to) four output verts from SoA back to AoS format */
@@ -190,7 +190,7 @@ draw_create_vs_ppc(struct draw_context *draw,
vs->base.create_varient = draw_vs_varient_aos_ppc;
else
#endif
- vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.create_varient = draw_vs_create_varient_generic;
vs->base.prepare = vs_ppc_prepare;
vs->base.run_linear = vs_ppc_run_linear;
vs->base.delete = vs_ppc_delete;
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 54e6423388..14c95082a9 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -165,9 +165,9 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.draw = draw;
if (1)
- vs->base.create_varient = draw_vs_varient_aos_sse;
+ vs->base.create_varient = draw_vs_create_varient_aos_sse;
else
- vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.create_varient = draw_vs_create_varient_generic;
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 0abd827b61..6eb26927f2 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -263,8 +263,9 @@ static void vsvg_destroy( struct draw_vs_varient *varient )
}
-struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
- const struct draw_vs_varient_key *key )
+struct draw_vs_varient *
+draw_vs_create_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
{
unsigned i;
struct translate_key fetch, emit;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 5f9d4c62d1..20ae958714 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1210,6 +1210,14 @@ LLVMValueRef
lp_build_cos(struct lp_build_context *bld,
LLVMValueRef a)
{
+#ifdef PIPE_OS_WINDOWS
+ /*
+ * FIXME: X86 backend translates llvm.cos.v4f32 to 4 calls to CRT's cosf()
+ * which is neither efficient nor does the CRT linkage work on Windows
+ * causing segmentation fault. So simply disable the code for now.
+ */
+ return bld->one;
+#else
const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -1220,6 +1228,7 @@ lp_build_cos(struct lp_build_context *bld,
util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width);
return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
+#endif
}
@@ -1230,6 +1239,14 @@ LLVMValueRef
lp_build_sin(struct lp_build_context *bld,
LLVMValueRef a)
{
+#ifdef PIPE_OS_WINDOWS
+ /*
+ * FIXME: X86 backend translates llvm.sin.v4f32 to 4 calls to CRT's sinf()
+ * which is neither efficient nor does the CRT linkage work on Windows
+ * causing segmentation fault. So simply disable the code for now.
+ */
+ return bld->zero;
+#else
const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -1240,6 +1257,7 @@ lp_build_sin(struct lp_build_context *bld,
util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width);
return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
+#endif
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index e60ab4f6ba..8f15b1d287 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -792,3 +792,78 @@ lp_build_endif(struct lp_build_if_state *ctx)
/* Resume building code at end of the ifthen->merge_block */
LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
}
+
+
+/**
+ * Allocate a scalar (or vector) variable.
+ *
+ * Although not strictly part of control flow, control flow has deep impact in
+ * how variables should be allocated.
+ *
+ * The mem2reg optimization pass is the recommended way to dealing with mutable
+ * variables, and SSA. It looks for allocas and if it can handle them, it
+ * promotes them, but only looks for alloca instructions in the entry block of
+ * the function. Being in the entry block guarantees that the alloca is only
+ * executed once, which makes analysis simpler.
+ *
+ * See also:
+ * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory
+ */
+LLVMValueRef
+lp_build_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ const char *name)
+{
+ LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
+ LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
+ LLVMBuilderRef first_builder = LLVMCreateBuilder();
+ LLVMValueRef res;
+
+ LLVMPositionBuilderAtEnd(first_builder, first_block);
+ LLVMPositionBuilderBefore(first_builder, first_instr);
+
+ res = LLVMBuildAlloca(first_builder, type, name);
+
+ LLVMDisposeBuilder(first_builder);
+
+ return res;
+}
+
+
+/**
+ * Allocate an array of scalars/vectors.
+ *
+ * mem2reg pass is not capable of promoting structs or arrays to registers, but
+ * we still put it in the first block anyway as failure to put allocas in the
+ * first block may prevent the X86 backend from successfully align the stack as
+ * required.
+ *
+ * Also the scalarrepl pass is supossedly more powerful and can promote
+ * arrays in many cases.
+ *
+ * See also:
+ * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory
+ */
+LLVMValueRef
+lp_build_array_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ LLVMValueRef count,
+ const char *name)
+{
+ LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
+ LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
+ LLVMBuilderRef first_builder = LLVMCreateBuilder();
+ LLVMValueRef res;
+
+ LLVMPositionBuilderBefore(first_builder, first_instr);
+
+ res = LLVMBuildArrayAlloca(first_builder, type, count, name);
+
+ LLVMDisposeBuilder(first_builder);
+
+ return res;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
index 745838570c..fffb493a93 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
@@ -156,5 +156,15 @@ lp_build_endif(struct lp_build_if_state *ctx);
LLVMBasicBlockRef
lp_build_insert_new_block(LLVMBuilderRef builder, const char *name);
+LLVMValueRef
+lp_build_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ const char *name);
+
+LLVMValueRef
+lp_build_array_alloca(LLVMBuilderRef builder,
+ LLVMTypeRef type,
+ LLVMValueRef count,
+ const char *name);
#endif /* !LP_BLD_FLOW_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index a5a019fa92..6257e9a404 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -40,6 +40,7 @@
#include "lp_bld_init.h"
#include "lp_bld_type.h"
+#include "lp_bld_flow.h"
#include "lp_bld_format.h"
@@ -370,11 +371,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
}
- /*
- * XXX: this should better go to the first block in the function
- */
-
- tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
+ tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
/*
* Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index a3b6970116..d13fa1a5d0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -472,18 +472,6 @@ lp_build_select_aos(struct lp_build_context *bld,
}
}
-LLVMValueRef
-lp_build_alloca(struct lp_build_context *bld)
-{
- const struct lp_type type = bld->type;
-
- if (type.length > 1) { /*vector*/
- return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), "");
- } else { /*scalar*/
- return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), "");
- }
-}
-
/** Return (a & ~b) */
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
index 00a8c75019..29f9fc3b20 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -76,9 +76,6 @@ lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef b,
const boolean cond[4]);
-LLVMValueRef
-lp_build_alloca(struct lp_build_context *bld);
-
LLVMValueRef
lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 54ef921678..74dc9e1d81 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1830,6 +1830,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef unswizzled[4];
LLVMValueRef stride;
+ assert(bld->static_state->target == PIPE_TEXTURE_2D);
+ assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
+ assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
+ assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
+
lp_build_context_init(&i32, builder, lp_type_int_vec(32));
lp_build_context_init(&h16, builder, lp_type_ufixed(16));
lp_build_context_init(&u8n, builder, lp_type_unorm(8));
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index fac90c6006..d3c769e28b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -744,22 +744,11 @@ emit_declaration(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_declaration *decl)
{
+ LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
+
unsigned first = decl->Range.First;
unsigned last = decl->Range.Last;
unsigned idx, i;
- LLVMBasicBlockRef current_block =
- LLVMGetInsertBlock(bld->base.builder);
- LLVMBasicBlockRef first_block =
- LLVMGetEntryBasicBlock(
- LLVMGetBasicBlockParent(current_block));
- LLVMValueRef first_inst =
- LLVMGetFirstInstruction(first_block);
-
- /* we want alloca's to be the first instruction
- * in the function so we need to rewind the builder
- * to the very beginning */
- LLVMPositionBuilderBefore(bld->base.builder,
- first_inst);
for (idx = first; idx <= last; ++idx) {
switch (decl->Declaration.File) {
@@ -767,23 +756,25 @@ emit_declaration(
if (bld->has_indirect_addressing) {
LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
last*4 + 4, 0);
- bld->temps_array = LLVMBuildArrayAlloca(bld->base.builder,
- lp_build_vec_type(bld->base.type),
- val, "");
+ bld->temps_array = lp_build_array_alloca(bld->base.builder,
+ vec_type, val, "");
} else {
for (i = 0; i < NUM_CHANNELS; i++)
- bld->temps[idx][i] = lp_build_alloca(&bld->base);
+ bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
}
break;
case TGSI_FILE_OUTPUT:
for (i = 0; i < NUM_CHANNELS; i++)
- bld->outputs[idx][i] = lp_build_alloca(&bld->base);
+ bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
break;
case TGSI_FILE_ADDRESS:
for (i = 0; i < NUM_CHANNELS; i++)
- bld->addr[idx][i] = lp_build_alloca(&bld->base);
+ bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
break;
default:
@@ -792,8 +783,6 @@ emit_declaration(
}
}
- LLVMPositionBuilderAtEnd(bld->base.builder,
- current_block);
return TRUE;
}
@@ -1600,18 +1589,10 @@ emit_instruction(
lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
break;
- case TGSI_OPCODE_BGNFOR:
- /* fall through */
case TGSI_OPCODE_BGNLOOP:
lp_exec_bgnloop(&bld->exec_mask);
break;
- case TGSI_OPCODE_REP:
- /* deprecated */
- assert(0);
- return FALSE;
- break;
-
case TGSI_OPCODE_ELSE:
lp_exec_mask_cond_invert(&bld->exec_mask);
break;
@@ -1620,18 +1601,10 @@ emit_instruction(
lp_exec_mask_cond_pop(&bld->exec_mask);
break;
- case TGSI_OPCODE_ENDFOR:
- /* fall-through */
case TGSI_OPCODE_ENDLOOP:
lp_exec_endloop(&bld->exec_mask);
break;
- case TGSI_OPCODE_ENDREP:
- /* deprecated */
- assert(0);
- return FALSE;
- break;
-
case TGSI_OPCODE_PUSHA:
/* deprecated? */
assert(0);
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
index 080fd4c731..5d9eed9258 100644
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
@@ -661,25 +661,6 @@ TGSI Instruction Specification
TBD
-1.9.8 BGNFOR - Begin a For-Loop
-
- dst.x = floor(src.x)
- dst.y = floor(src.y)
- dst.z = floor(src.z)
-
- if (dst.y <= 0)
- pc = [matching ENDFOR] + 1
- endif
-
- Note: The destination must be a loop register.
- The source must be a constant register.
-
-
-1.9.9 REP - Repeat
-
- TBD
-
-
1.9.10 ELSE - Else
TBD
@@ -690,23 +671,6 @@ TGSI Instruction Specification
TBD
-1.9.12 ENDFOR - End a For-Loop
-
- dst.x = dst.x + dst.z
- dst.y = dst.y - 1.0
-
- if (dst.y > 0)
- pc = [matching BGNFOR instruction] + 1
- endif
-
- Note: The destination must be a loop register.
-
-
-1.9.13 ENDREP - End Repeat
-
- TBD
-
-
1.10 GL_NV_vertex_program3
---------------------------
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 57031419f8..8300020018 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -586,7 +586,6 @@ iter_instruction(
/* update indentation */
if (inst->Instruction.Opcode == TGSI_OPCODE_IF ||
inst->Instruction.Opcode == TGSI_OPCODE_ELSE ||
- inst->Instruction.Opcode == TGSI_OPCODE_BGNFOR ||
inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
ctx->indentation += indent_spaces;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 11045e4ba2..82eac05dc4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3186,14 +3186,6 @@ exec_instruction(
*pc = -1;
break;
- case TGSI_OPCODE_REP:
- assert (0);
- break;
-
- case TGSI_OPCODE_ENDREP:
- assert (0);
- break;
-
case TGSI_OPCODE_PUSHA:
assert (0);
break;
@@ -3258,29 +3250,6 @@ exec_instruction(
emit_primitive(mach);
break;
- case TGSI_OPCODE_BGNFOR:
- assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- for (chan_index = 0; chan_index < 3; chan_index++) {
- FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
- }
- ++mach->LoopCounterStackTop;
- STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
- /* update LoopMask */
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
- mach->LoopMask &= ~0x1;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
- mach->LoopMask &= ~0x2;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
- mach->LoopMask &= ~0x4;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
- mach->LoopMask &= ~0x8;
- }
- /* TODO: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
- /* fall-through (for now) */
case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
@@ -3295,56 +3264,6 @@ exec_instruction(
mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
break;
- case TGSI_OPCODE_ENDFOR:
- assert(mach->LoopCounterStackTop > 0);
- micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
- /* update LoopMask */
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
- mach->LoopMask &= ~0x1;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
- mach->LoopMask &= ~0x2;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
- mach->LoopMask &= ~0x4;
- }
- if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
- mach->LoopMask &= ~0x8;
- }
- micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
- assert(mach->LoopLabelStackTop > 0);
- inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
- STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
- /* Restore ContMask, but don't pop */
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
- UPDATE_EXEC_MASK(mach);
- if (mach->ExecMask) {
- /* repeat loop: jump to instruction just past BGNLOOP */
- assert(mach->LoopLabelStackTop > 0);
- *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
- }
- else {
- /* exit loop: pop LoopMask */
- assert(mach->LoopStackTop > 0);
- mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- /* pop ContMask */
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[--mach->ContStackTop];
- assert(mach->LoopLabelStackTop > 0);
- --mach->LoopLabelStackTop;
- assert(mach->LoopCounterStackTop > 0);
- --mach->LoopCounterStackTop;
-
- mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
- }
- UPDATE_EXEC_MASK(mach);
- break;
-
case TGSI_OPCODE_ENDLOOP:
/* Restore ContMask, but don't pop */
assert(mach->ContStackTop > 0);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index de0e09cdba..cfa2f631bd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -106,12 +106,12 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL },
{ 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK },
{ 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF },
- { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR },
- { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP },
+ { 1, 1, 0, 0, 0, 1, "", 75 }, /* removed */
+ { 0, 1, 0, 0, 0, 1, "", 76 }, /* removed */
{ 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE },
{ 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF },
- { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR },
- { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP },
+ { 1, 0, 0, 0, 1, 0, "", 79 }, /* removed */
+ { 0, 0, 0, 0, 1, 0, "", 80 }, /* removed */
{ 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA },
{ 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA },
{ 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index e4af15c156..e472947507 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -111,12 +111,8 @@ OP12(DP2)
OP12_TEX(TXL)
OP00(BRK)
OP01_LBL(IF)
-OP11(BGNFOR)
-OP01(REP)
OP00_LBL(ELSE)
OP00(ENDIF)
-OP10(ENDFOR)
-OP00(ENDREP)
OP01(PUSHA)
OP10(POPA)
OP11(CEIL)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 371f690b29..76b7564cc3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -346,25 +346,6 @@ iter_instruction(
}
}
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_BGNFOR:
- case TGSI_OPCODE_ENDFOR:
- if (inst->Dst[0].Register.File != TGSI_FILE_LOOP ||
- inst->Dst[0].Register.Index != 0) {
- report_error(ctx, "Destination register must be LOOP[0]");
- }
- break;
- }
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_BGNFOR:
- if (inst->Src[0].Register.File != TGSI_FILE_CONSTANT &&
- inst->Src[0].Register.File != TGSI_FILE_IMMEDIATE) {
- report_error(ctx, "Source register file must be either CONST or IMM");
- }
- break;
- }
-
ctx->num_instructions++;
return TRUE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index a85cc4659e..1071298b49 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2533,14 +2533,6 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_BGNFOR:
- return 0;
- break;
-
- case TGSI_OPCODE_REP:
- return 0;
- break;
-
case TGSI_OPCODE_ELSE:
return 0;
break;
@@ -2549,14 +2541,6 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_ENDFOR:
- return 0;
- break;
-
- case TGSI_OPCODE_ENDREP:
- return 0;
- break;
-
case TGSI_OPCODE_PUSHA:
return 0;
break;
diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c
new file mode 100644
index 0000000000..048bd5c34d
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_caps.c
@@ -0,0 +1,244 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+#include "util/u_debug.h"
+#include "u_caps.h"
+
+/**
+ * Iterates over a list of caps checks as defined in u_caps.h. Should
+ * all checks pass returns TRUE and out is set to the last element of
+ * the list (TERMINATE). Should any check fail returns FALSE and set
+ * out to the index of the start of the first failing check.
+ */
+boolean
+util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out)
+{
+ int i, tmpi;
+ float tmpf;
+
+ for (i = 0; list[i];) {
+ switch(list[i++]) {
+ case UTIL_CAPS_CHECK_CAP:
+ if (!screen->get_param(screen, list[i++])) {
+ *out = i - 2;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_INT:
+ tmpi = screen->get_param(screen, list[i++]);
+ if (tmpi < (int)list[i++]) {
+ *out = i - 3;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_FLOAT:
+ tmpf = screen->get_paramf(screen, list[i++]);
+ if (tmpf < (float)list[i++]) {
+ *out = i - 3;
+ return FALSE;
+ }
+ break;
+ case UTIL_CAPS_CHECK_FORMAT:
+ if (!screen->is_format_supported(screen,
+ list[i++],
+ PIPE_TEXTURE_2D,
+ PIPE_BIND_SAMPLER_VIEW,
+ 0)) {
+ *out = i - 2;
+ return FALSE;
+ }
+ case UTIL_CAPS_CHECK_UNIMPLEMENTED:
+ *out = i - 1;
+ return FALSE;
+ default:
+ assert(!"Unsupported check");
+ return FALSE;
+ }
+ }
+
+ *out = i;
+ return TRUE;
+}
+
+/**
+ * Iterates over a list of caps checks as defined in u_caps.h.
+ * Returns TRUE if all caps checks pass returns FALSE otherwise.
+ */
+boolean
+util_check_caps(struct pipe_screen *screen, const unsigned *list)
+{
+ int out;
+ return util_check_caps_out(screen, list, &out);
+}
+
+
+/*
+ * Below follows some demo lists.
+ *
+ * None of these lists are exhausting lists of what is
+ * actually needed to support said API and more here for
+ * as example on how to uses the above functions. Especially
+ * for DX10 and DX11 where Gallium is missing features.
+ */
+
+/* DX 9_1 */
+static unsigned caps_dx_9_1[] = {
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 2),
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX 9_2 */
+static unsigned caps_dx_9_2[] = {
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX 9_3 */
+static unsigned caps_dx_9_3[] = {
+ UTIL_CHECK_CAP(SM3),
+ //UTIL_CHECK_CAP(INSTANCING),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 4),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 13), /* 4096 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX 10 */
+static unsigned caps_dx_10[] = {
+ UTIL_CHECK_CAP(SM3),
+ //UTIL_CHECK_CAP(INSTANCING),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 8192 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 8192 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */
+ UTIL_CHECK_TERMINATE
+};
+
+/* DX11 */
+static unsigned caps_dx_11[] = {
+ UTIL_CHECK_CAP(SM3),
+ //UTIL_CHECK_CAP(INSTANCING),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8),
+ UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 16384 */
+ UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */
+ UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 16384 */
+ UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16),
+ UTIL_CHECK_FORMAT(B8G8R8A8_UNORM),
+ UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */
+ UTIL_CHECK_TERMINATE
+};
+
+/* OpenGL 2.1 */
+static unsigned caps_opengl_2_1[] = {
+ UTIL_CHECK_CAP(GLSL),
+ UTIL_CHECK_CAP(OCCLUSION_QUERY),
+ UTIL_CHECK_CAP(TWO_SIDED_STENCIL),
+ UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE),
+ UTIL_CHECK_INT(MAX_RENDER_TARGETS, 2),
+ UTIL_CHECK_TERMINATE
+};
+
+/* OpenGL 3.0 */
+/* UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), */
+
+
+/**
+ * Demo function which checks against theoretical caps needed for different APIs.
+ */
+void util_caps_demo_print(struct pipe_screen *screen)
+{
+ struct {
+ char* name;
+ unsigned *list;
+ } list[] = {
+ {"DX 9.1", caps_dx_9_1},
+ {"DX 9.2", caps_dx_9_2},
+ {"DX 9.3", caps_dx_9_3},
+ {"DX 10", caps_dx_10},
+ {"DX 11", caps_dx_11},
+ {"OpenGL 2.1", caps_opengl_2_1},
+/* {"OpenGL 3.0", caps_opengl_3_0},*/
+ {NULL, NULL}
+ };
+ int i, out = 0;
+
+ for (i = 0; list[i].name; i++) {
+ if (util_check_caps_out(screen, list[i].list, &out)) {
+ debug_printf("%s: %s yes\n", __FUNCTION__, list[i].name);
+ continue;
+ }
+ switch (list[i].list[out]) {
+ case UTIL_CAPS_CHECK_CAP:
+ debug_printf("%s: %s no (cap %u not supported)\n", __FUNCTION__,
+ list[i].name,
+ list[i].list[out + 1]);
+ break;
+ case UTIL_CAPS_CHECK_INT:
+ debug_printf("%s: %s no (cap %u less then %u)\n", __FUNCTION__,
+ list[i].name,
+ list[i].list[out + 1],
+ list[i].list[out + 2]);
+ break;
+ case UTIL_CAPS_CHECK_FLOAT:
+ debug_printf("%s: %s no (cap %u less then %f)\n", __FUNCTION__,
+ list[i].name,
+ list[i].list[out + 1],
+ (double)(int)list[i].list[out + 2]);
+ break;
+ case UTIL_CAPS_CHECK_FORMAT:
+ debug_printf("%s: %s no (format %s not supported)\n", __FUNCTION__,
+ list[i].name,
+ util_format_name(list[i].list[out + 1]) + 12);
+ break;
+ case UTIL_CAPS_CHECK_UNIMPLEMENTED:
+ debug_printf("%s: %s no (not implemented in gallium or state tracker)\n",
+ __FUNCTION__, list[i].name);
+ break;
+ default:
+ assert(!"Unsupported check");
+ }
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_caps.h b/src/gallium/auxiliary/util/u_caps.h
new file mode 100644
index 0000000000..b1074f9eb2
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_caps.h
@@ -0,0 +1,67 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_CAPS_H
+#define U_CAPS_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_screen;
+
+enum u_caps_check_enum {
+ UTIL_CAPS_CHECK_TERMINATE = 0,
+ UTIL_CAPS_CHECK_CAP,
+ UTIL_CAPS_CHECK_INT,
+ UTIL_CAPS_CHECK_FLOAT,
+ UTIL_CAPS_CHECK_FORMAT,
+ UTIL_CAPS_CHECK_UNIMPLEMENTED,
+};
+
+#define UTIL_CHECK_CAP(cap) \
+ UTIL_CAPS_CHECK_CAP, PIPE_CAP_##cap
+
+#define UTIL_CHECK_INT(cap, higher) \
+ UTIL_CAPS_CHECK_INT, PIPE_CAP_##cap, (unsigned)(higher)
+
+/* Floats currently lose precision */
+#define UTIL_CHECK_FLOAT(cap, higher) \
+ UTIL_CAPS_CHECK_FLOAT, PIPE_CAP_##cap, (unsigned)(int)(higher)
+
+#define UTIL_CHECK_FORMAT(format) \
+ UTIL_CAPS_CHECK_FORMAT, PIPE_FORMAT_##format
+
+#define UTIL_CHECK_UNIMPLEMENTED \
+ UTIL_CAPS_CHECK_UNIMPLEMENTED
+
+#define UTIL_CHECK_TERMINATE \
+ UTIL_CAPS_CHECK_TERMINATE
+
+boolean util_check_caps(struct pipe_screen *screen, const unsigned *list);
+boolean util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out);
+void util_caps_demo_print(struct pipe_screen *screen);
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index c134f13e90..2ce643e90c 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -656,12 +656,12 @@ util_dump_transfer(struct os_stream *stream, const struct pipe_transfer *state)
util_dump_struct_begin(stream, "pipe_transfer");
util_dump_member(stream, ptr, state, resource);
-// util_dump_member(stream, uint, state, box);
+ /*util_dump_member(stream, uint, state, box);*/
util_dump_member(stream, uint, state, stride);
util_dump_member(stream, uint, state, slice_stride);
-// util_dump_member(stream, ptr, state, data);
+ /*util_dump_member(stream, ptr, state, data);*/
util_dump_struct_end(stream);
}
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 605b13bd11..fb6ade5c06 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -332,7 +332,7 @@ util_format_name(enum pipe_format format)
assert(desc);
if (!desc) {
- return "???";
+ return "PIPE_FORMAT_???";
}
return desc->name;
diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c
index 66edb597fc..5b279b8fe2 100644
--- a/src/gallium/auxiliary/util/u_format_s3tc.c
+++ b/src/gallium/auxiliary/util/u_format_s3tc.c
@@ -240,13 +240,14 @@ util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
util_format_dxtn_fetch_t fetch,
unsigned block_size)
{
+ const unsigned bw = 4, bh = 4, comps = 4;
unsigned x, y, i, j;
- for(y = 0; y < height; y += 4) {
+ for(y = 0; y < height; y += bh) {
const uint8_t *src = src_row;
- for(x = 0; x < width; x += 4) {
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ for(x = 0; x < width; x += bw) {
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps;
fetch(0, src, i, j, dst);
}
}
@@ -379,212 +380,197 @@ util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
void
util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
- const uint8_t *src_row, unsigned src_stride,
+ const uint8_t *src, unsigned src_stride,
unsigned width, unsigned height)
{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 8;
unsigned x, y, i, j, k;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
+ for(y = 0; y < height; y += bh) {
uint8_t *dst = dst_row;
- for(x = 0; x < width; x += 4) {
- uint8_t tmp[4][4][3];
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][3]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
for(k = 0; k < 3; ++k) {
- tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*4 + k];
}
}
}
- util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride);
- src += 4*4;
- dst += 8;
+ util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0);
+ dst += bytes_per_block;
}
- src_row += src_stride;
- dst_row += 4*dst_stride/sizeof(*dst_row);
+ dst_row += dst_stride / sizeof(*dst_row);
}
}
void
util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
- const uint8_t *src_row, unsigned src_stride,
+ const uint8_t *src, unsigned src_stride,
unsigned width, unsigned height)
{
+ const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 8;
unsigned x, y, i, j, k;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
+ for(y = 0; y < height; y += bh) {
uint8_t *dst = dst_row;
- for(x = 0; x < width; x += 4) {
- uint8_t tmp[4][4][4];
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ for(k = 0; k < comps; ++k) {
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride);
- src += 4*4;
- dst += 8;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0);
+ dst += bytes_per_block;
}
- src_row += src_stride;
- dst_row += 4*dst_stride/sizeof(*dst_row);
+ dst_row += dst_stride / sizeof(*dst_row);
}
}
void
util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
- const uint8_t *src_row, unsigned src_stride,
+ const uint8_t *src, unsigned src_stride,
unsigned width, unsigned height)
{
+ const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16;
unsigned x, y, i, j, k;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
+ for(y = 0; y < height; y += bh) {
uint8_t *dst = dst_row;
- for(x = 0; x < width; x += 4) {
- uint8_t tmp[4][4][4];
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ for(k = 0; k < comps; ++k) {
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride);
- src += 4*4;
- dst += 16;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0);
+ dst += bytes_per_block;
}
- src_row += src_stride;
- dst_row += 4*dst_stride/sizeof(*dst_row);
+ dst_row += dst_stride / sizeof(*dst_row);
}
}
void
util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
- const uint8_t *src_row, unsigned src_stride,
+ const uint8_t *src, unsigned src_stride,
unsigned width, unsigned height)
{
+ const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16;
unsigned x, y, i, j, k;
- for(y = 0; y < height; y += 4) {
- const uint8_t *src = src_row;
+
+ for(y = 0; y < height; y += bh) {
uint8_t *dst = dst_row;
- for(x = 0; x < width; x += 4) {
- uint8_t tmp[4][4][4];
- for(j = 0; j < 4; ++j) {
- for(i = 0; i < 4; ++i) {
- for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ for(k = 0; k < comps; ++k) {
+ tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride);
- src += 4*4;
- dst += 16;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0);
+ dst += bytes_per_block;
}
- src_row += src_stride;
- dst_row += 4*dst_stride/sizeof(*dst_row);
+ dst_row += dst_stride / sizeof(*dst_row);
}
}
void
util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
- const float *src_row, unsigned src_stride,
+ const float *src, unsigned src_stride,
unsigned width, unsigned height)
{
unsigned x, y, i, j, k;
for(y = 0; y < height; y += 4) {
- const float *src = src_row;
uint8_t *dst = dst_row;
for(x = 0; x < width; x += 4) {
uint8_t tmp[4][4][3];
for(j = 0; j < 4; ++j) {
for(i = 0; i < 4; ++i) {
for(k = 0; k < 3; ++k) {
- tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
}
}
}
- util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride);
- src += 4*4;
+ util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0);
dst += 8;
}
- src_row += src_stride;
dst_row += 4*dst_stride/sizeof(*dst_row);
}
}
void
util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
- const float *src_row, unsigned src_stride,
+ const float *src, unsigned src_stride,
unsigned width, unsigned height)
{
unsigned x, y, i, j, k;
for(y = 0; y < height; y += 4) {
- const float *src = src_row;
uint8_t *dst = dst_row;
for(x = 0; x < width; x += 4) {
uint8_t tmp[4][4][4];
for(j = 0; j < 4; ++j) {
for(i = 0; i < 4; ++i) {
for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride);
- src += 4*4;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0);
dst += 8;
}
- src_row += src_stride;
dst_row += 4*dst_stride/sizeof(*dst_row);
}
}
void
-util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height)
{
unsigned x, y, i, j, k;
for(y = 0; y < height; y += 4) {
- const float *src = src_row;
uint8_t *dst = dst_row;
for(x = 0; x < width; x += 4) {
uint8_t tmp[4][4][4];
for(j = 0; j < 4; ++j) {
for(i = 0; i < 4; ++i) {
for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride);
- src += 4*4;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0);
dst += 16;
}
- src_row += src_stride;
dst_row += 4*dst_stride/sizeof(*dst_row);
}
}
void
-util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+ const float *src, unsigned src_stride,
+ unsigned width, unsigned height)
{
unsigned x, y, i, j, k;
for(y = 0; y < height; y += 4) {
- const float *src = src_row;
uint8_t *dst = dst_row;
for(x = 0; x < width; x += 4) {
uint8_t tmp[4][4][4];
for(j = 0; j < 4; ++j) {
for(i = 0; i < 4; ++i) {
for(k = 0; k < 4; ++k) {
- tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+ tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
}
}
}
- util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride);
- src += 4*4;
+ util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0);
dst += 16;
}
- src_row += src_stride;
dst_row += 4*dst_stride/sizeof(*dst_row);
}
}
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index fe327c302b..f7aa1403d0 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -544,7 +544,7 @@ pipe_put_tile_z(struct pipe_context *pipe,
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
{
uint *pDest = (uint *) (map + y * pt->stride + x*4);
- //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);
+ /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
/* convert 32-bit Z to 24-bit Z, preserve stencil */
@@ -571,7 +571,7 @@ pipe_put_tile_z(struct pipe_context *pipe,
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
{
uint *pDest = (uint *) (map + y * pt->stride + x*4);
- //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);
+ /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
/* convert 32-bit Z to 24-bit Z, preserve stencil */