summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c22
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c61
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h14
-rw-r--r--src/gallium/auxiliary/sct/sct.c1
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.c59
-rwxr-xr-xsrc/gallium/auxiliary/tgsi/exec/tgsi_sse2.c196
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_dump.c25
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_parse.c4
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_parse.h4
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_scan.c15
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c2
-rw-r--r--src/gallium/auxiliary/util/p_debug.c2
-rw-r--r--src/gallium/auxiliary/util/u_blit.c2
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c3
17 files changed, 213 insertions, 201 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 3aa326acc7..4bca92ff11 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -256,7 +256,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
uint size = 4;
immed = tgsi_default_full_immediate();
immed.Immediate.Size = 1 + size; /* one for the token itself */
- immed.u.ImmediateFloat32 = (struct tgsi_immediate_float32 *) value;
+ immed.u.Pointer = (void *) value;
ctx->emit_immediate(ctx, &immed);
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index c85d8ded50..355093f945 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -200,7 +200,6 @@ static void varray_prepare(struct draw_pt_front_end *frontend,
unsigned opt)
{
struct varray_frontend *varray = (struct varray_frontend *)frontend;
- const struct pipe_rasterizer_state *rasterizer = varray->draw->rasterizer;
if (opt & PT_PIPELINE)
{
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 2f9775814f..6b3fb1406b 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -225,7 +225,6 @@ static void vcache_prepare( struct draw_pt_front_end *frontend,
unsigned opt )
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- const struct pipe_rasterizer_state *rasterizer = vcache->draw->rasterizer;
if (opt & PT_PIPELINE)
{
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 07f85bc448..a57c938fbf 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -51,17 +51,17 @@
#if SSE_SWIZZLES
typedef void (XSTDCALL *codegen_function) (
- const struct tgsi_exec_vector *input,
- struct tgsi_exec_vector *output,
- float (*constant)[4],
- struct tgsi_exec_vector *temporary,
- float (*immediates)[4],
- const float (*aos_input)[4],
- uint num_inputs,
- uint input_stride,
- float (*aos_output)[4],
- uint num_outputs,
- uint output_stride );
+ const struct tgsi_exec_vector *input, /* 1 */
+ struct tgsi_exec_vector *output, /* 2 */
+ float (*constant)[4], /* 3 */
+ struct tgsi_exec_vector *temporary, /* 4 */
+ float (*immediates)[4], /* 5 */
+ const float (*aos_input)[4], /* 6 */
+ uint num_inputs, /* 7 */
+ uint input_stride, /* 8 */
+ float (*aos_output)[4], /* 9 */
+ uint num_outputs, /* 10 */
+ uint output_stride ); /* 11 */
#else
typedef void (XSTDCALL *codegen_function) (
const struct tgsi_exec_vector *input,
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index d7e2230557..4e036d9032 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -347,9 +347,9 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg )
return x86_make_reg( reg.file, reg.idx );
}
-unsigned char *x86_get_label( struct x86_function *p )
+int x86_get_label( struct x86_function *p )
{
- return p->csr;
+ return p->csr - p->store;
}
@@ -361,17 +361,22 @@ unsigned char *x86_get_label( struct x86_function *p )
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
- unsigned char *label )
+ int label )
{
- intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2);
+ int offset = label - (x86_get_label(p) + 2);
DUMP_I(cc);
+ if (offset < 0) {
+ int amt = p->csr - p->store;
+ assert(amt > -offset);
+ }
+
if (offset <= 127 && offset >= -128) {
emit_1ub(p, 0x70 + cc);
emit_1b(p, (char) offset);
}
else {
- offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 6);
+ offset = label - (x86_get_label(p) + 6);
emit_2ub(p, 0x0f, 0x80 + cc);
emit_1i(p, offset);
}
@@ -379,8 +384,8 @@ void x86_jcc( struct x86_function *p,
/* Always use a 32bit offset for forward jumps:
*/
-unsigned char *x86_jcc_forward( struct x86_function *p,
- enum x86_cc cc )
+int x86_jcc_forward( struct x86_function *p,
+ enum x86_cc cc )
{
DUMP_I(cc);
emit_2ub(p, 0x0f, 0x80 + cc);
@@ -388,7 +393,7 @@ unsigned char *x86_jcc_forward( struct x86_function *p,
return x86_get_label(p);
}
-unsigned char *x86_jmp_forward( struct x86_function *p)
+int x86_jmp_forward( struct x86_function *p)
{
DUMP();
emit_1ub(p, 0xe9);
@@ -396,7 +401,7 @@ unsigned char *x86_jmp_forward( struct x86_function *p)
return x86_get_label(p);
}
-unsigned char *x86_call_forward( struct x86_function *p)
+int x86_call_forward( struct x86_function *p)
{
DUMP();
@@ -408,42 +413,24 @@ unsigned char *x86_call_forward( struct x86_function *p)
/* Fixup offset from forward jump:
*/
void x86_fixup_fwd_jump( struct x86_function *p,
- unsigned char *fixup )
+ int fixup )
{
- *(int *)(fixup - 4) = pointer_to_intptr( x86_get_label(p) ) - pointer_to_intptr( fixup );
+ *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup;
}
-void x86_jmp( struct x86_function *p, unsigned char *label)
+void x86_jmp( struct x86_function *p, int label)
{
DUMP_I( label );
emit_1ub(p, 0xe9);
- emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4);
-}
-
-#if 0
-static unsigned char *cptr( void (*label)() )
-{
- return (unsigned char *) label;
+ emit_1i(p, label - x86_get_label(p) - 4);
}
-/* This doesn't work once we start reallocating & copying the
- * generated code on buffer fills, because the call is relative to the
- * current pc.
- */
-void x86_call( struct x86_function *p, void (*label)())
-{
- DUMP_I( label );
- emit_1ub(p, 0xe8);
- emit_1i(p, cptr(label) - x86_get_label(p) - 4);
-}
-#else
void x86_call( struct x86_function *p, struct x86_reg reg)
{
DUMP_R( reg );
emit_1ub(p, 0xff);
emit_modrm_noreg(p, 2, reg);
}
-#endif
/* michal:
@@ -462,8 +449,15 @@ void x86_push( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x50 + reg.idx);
+ if (reg.mod == mod_REG)
+ emit_1ub(p, 0x50 + reg.idx);
+ else
+ {
+ emit_1ub(p, 0xff);
+ emit_modrm_noreg(p, 6, reg);
+ }
+
+
p->stack_offset += 4;
}
@@ -495,6 +489,7 @@ void x86_dec( struct x86_function *p,
void x86_ret( struct x86_function *p )
{
DUMP();
+ assert(p->stack_offset == 0);
emit_1ub(p, 0xc3);
}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index ad79b1facf..eacaeeaf6f 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -124,23 +124,23 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
/* Labels, jumps and fixup:
*/
-unsigned char *x86_get_label( struct x86_function *p );
+int x86_get_label( struct x86_function *p );
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
- unsigned char *label );
+ int label );
-unsigned char *x86_jcc_forward( struct x86_function *p,
+int x86_jcc_forward( struct x86_function *p,
enum x86_cc cc );
-unsigned char *x86_jmp_forward( struct x86_function *p);
+int x86_jmp_forward( struct x86_function *p);
-unsigned char *x86_call_forward( struct x86_function *p);
+int x86_call_forward( struct x86_function *p);
void x86_fixup_fwd_jump( struct x86_function *p,
- unsigned char *fixup );
+ int fixup );
-void x86_jmp( struct x86_function *p, unsigned char *label );
+void x86_jmp( struct x86_function *p, int label );
/* void x86_call( struct x86_function *p, void (*label)() ); */
void x86_call( struct x86_function *p, struct x86_reg reg);
diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c
index 97ee5882a1..5e4126e014 100644
--- a/src/gallium/auxiliary/sct/sct.c
+++ b/src/gallium/auxiliary/sct/sct.c
@@ -209,6 +209,7 @@ remove_context_from_surface(struct sct_surface *si,
}
else {
prev = curr;
+ next = curr->next;
}
}
}
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index d55f907c0d..5d5125f7cb 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -1530,41 +1530,44 @@ exec_instruction(
break;
case TGSI_OPCODE_EXP:
- debug_printf("TGSI: EXP opcode not implemented\n");
- /* from ARB_v_p:
- tmp = ScalarLoad(op0);
- result.x = 2^floor(tmp);
- result.y = tmp - floor(tmp);
- result.z = RoughApprox2ToX(tmp);
- result.w = 1.0;
- */
-#if 0
- /* something like this: */
FETCH( &r[0], 0, CHAN_X );
- micro_exp2( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
+ STORE( &r[2], 0, CHAN_X ); /* store r2 */
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
+ STORE( &r[2], 0, CHAN_Y ); /* store r2 */
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
+ STORE( &r[2], 0, CHAN_Z ); /* store r2 */
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
-#endif
break;
case TGSI_OPCODE_LOG:
- debug_printf("TGSI: LOG opcode not implemented\n");
- /* from ARB_v_p:
- tmp = fabs(ScalarLoad(op0));
- result.x = floor(log2(tmp));
- result.y = tmp / 2^(floor(log2(tmp)));
- result.z = RoughApproxLog2(tmp);
- result.w = 1.0;
- */
-#if 0
- /* something like this: */
FETCH( &r[0], 0, CHAN_X );
- micro_lg2( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
+ micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
+ micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &r[0], 0, CHAN_X );
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
+ micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
+ STORE( &r[0], 0, CHAN_Y );
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ STORE( &r[1], 0, CHAN_Z );
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
-#endif
break;
case TGSI_OPCODE_MUL:
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 1138f59997..2fd76a3072 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -103,15 +103,9 @@ get_output_base( void )
static struct x86_reg
get_temp_base( void )
{
-#ifdef WIN32
return x86_make_reg(
file_REG32,
reg_BX );
-#else
- return x86_make_reg(
- file_REG32,
- reg_SI );
-#endif
}
static struct x86_reg
@@ -133,14 +127,6 @@ get_immediate_base( void )
* Data access helpers.
*/
-static struct x86_reg
-get_argument(
- unsigned index )
-{
- return x86_make_disp(
- x86_make_reg( file_REG32, reg_SP ),
- (index + 1) * 4 );
-}
static struct x86_reg
get_immediate(
@@ -455,19 +441,13 @@ emit_push_gp(
{
x86_push(
func,
- get_const_base() );
+ x86_make_reg( file_REG32, reg_AX) );
x86_push(
func,
- get_input_base() );
+ x86_make_reg( file_REG32, reg_CX) );
x86_push(
func,
- get_output_base() );
-
- /* It is important on non-win32 platforms that temp base is pushed last.
- */
- x86_push(
- func,
- get_temp_base() );
+ x86_make_reg( file_REG32, reg_DX) );
}
static void
@@ -478,16 +458,13 @@ x86_pop_gp(
*/
x86_pop(
func,
- get_temp_base() );
- x86_pop(
- func,
- get_output_base() );
+ x86_make_reg( file_REG32, reg_DX) );
x86_pop(
func,
- get_input_base() );
+ x86_make_reg( file_REG32, reg_CX) );
x86_pop(
func,
- get_const_base() );
+ x86_make_reg( file_REG32, reg_AX) );
}
static void
@@ -504,19 +481,23 @@ emit_func_call_dst(
emit_push_gp(
func );
-#ifdef WIN32
- x86_push(
- func,
- get_temp( TEMP_R0, 0 ) );
-#endif
-
{
struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+ x86_lea(
+ func,
+ ecx,
+ get_temp( TEMP_R0, 0 ) );
+
+ x86_push( func, ecx );
x86_mov_reg_imm( func, ecx, (unsigned long) code );
x86_call( func, ecx );
+#ifndef WIN32
+ x86_pop(func, ecx );
+#endif
}
+
x86_pop_gp(
func );
@@ -577,11 +558,7 @@ static void XSTDCALL
cos4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
store[X + 0] = cosf( store[X + 0] );
store[X + 1] = cosf( store[X + 1] );
@@ -604,11 +581,8 @@ static void XSTDCALL
ex24f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = powf( 2.0f, store[X + 0] );
store[X + 1] = powf( 2.0f, store[X + 1] );
store[X + 2] = powf( 2.0f, store[X + 2] );
@@ -641,11 +615,8 @@ static void XSTDCALL
flr4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = floorf( store[X + 0] );
store[X + 1] = floorf( store[X + 1] );
store[X + 2] = floorf( store[X + 2] );
@@ -667,11 +638,8 @@ static void XSTDCALL
frc4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] -= floorf( store[X + 0] );
store[X + 1] -= floorf( store[X + 1] );
store[X + 2] -= floorf( store[X + 2] );
@@ -693,11 +661,8 @@ static void XSTDCALL
lg24f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = LOG2( store[X + 0] );
store[X + 1] = LOG2( store[X + 1] );
store[X + 2] = LOG2( store[X + 2] );
@@ -755,11 +720,8 @@ static void XSTDCALL
pow4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = powf( store[X + 0], store[X + 4] );
store[X + 1] = powf( store[X + 1], store[X + 5] );
store[X + 2] = powf( store[X + 2], store[X + 6] );
@@ -800,11 +762,8 @@ static void XSTDCALL
rsqrt4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = 1.0F / sqrtf( store[X + 0] );
store[X + 1] = 1.0F / sqrtf( store[X + 1] );
store[X + 2] = 1.0F / sqrtf( store[X + 2] );
@@ -878,11 +837,8 @@ static void XSTDCALL
sin4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = sinf( store[X + 0] );
store[X + 1] = sinf( store[X + 1] );
store[X + 2] = sinf( store[X + 2] );
@@ -1234,11 +1190,16 @@ emit_instruction(
switch( inst->Instruction.Opcode ) {
case TGSI_OPCODE_ARL:
+#if 0
+ /* XXX this isn't working properly (see glean vertProg1 test) */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
emit_f2it( func, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
+#else
+ return 0;
+#endif
break;
case TGSI_OPCODE_MOV:
@@ -2029,40 +1990,40 @@ emit_declaration(
}
}
-static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
+static void aos_to_soa( struct x86_function *func,
+ uint arg_aos,
+ uint arg_soa,
+ uint arg_num,
+ uint arg_stride )
{
- struct x86_reg soa_input;
- struct x86_reg aos_input;
- struct x86_reg num_inputs;
- struct x86_reg temp;
- unsigned char *inner_loop;
+ struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX );
+ struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX );
+ struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX );
+ struct x86_reg stride = x86_make_reg( file_REG32, reg_DX );
+ int inner_loop;
- soa_input = x86_make_reg( file_REG32, reg_AX );
- aos_input = x86_make_reg( file_REG32, reg_BX );
- num_inputs = x86_make_reg( file_REG32, reg_CX );
- temp = x86_make_reg( file_REG32, reg_DX );
/* Save EBX */
x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
- x86_mov( func, soa_input, get_argument( soa + 1 ) );
- x86_mov( func, aos_input, get_argument( aos + 1 ) );
- x86_mov( func, num_inputs, get_argument( num + 1 ) );
+ x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) );
+ x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) );
+ x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
+ x86_mov( func, stride, x86_fn_arg( func, arg_stride ) );
/* do */
inner_loop = x86_get_label( func );
{
- x86_mov( func, temp, get_argument( stride + 1 ) );
x86_push( func, aos_input );
sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
- x86_add( func, aos_input, temp );
+ x86_add( func, aos_input, stride );
sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
- x86_add( func, aos_input, temp );
+ x86_add( func, aos_input, stride );
sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
- x86_add( func, aos_input, temp );
+ x86_add( func, aos_input, stride );
sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
x86_pop( func, aos_input );
@@ -2088,7 +2049,7 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num,
x86_jcc( func, cc_NE, inner_loop );
/* Restore EBX */
- x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
+ x86_pop( func, aos_input );
}
static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
@@ -2097,7 +2058,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
struct x86_reg aos_output;
struct x86_reg num_outputs;
struct x86_reg temp;
- unsigned char *inner_loop;
+ int inner_loop;
soa_output = x86_make_reg( file_REG32, reg_AX );
aos_output = x86_make_reg( file_REG32, reg_BX );
@@ -2105,11 +2066,11 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
temp = x86_make_reg( file_REG32, reg_DX );
/* Save EBX */
- x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
+ x86_push( func, aos_output );
- x86_mov( func, soa_output, get_argument( soa + 1 ) );
- x86_mov( func, aos_output, get_argument( aos + 1 ) );
- x86_mov( func, num_outputs, get_argument( num + 1 ) );
+ x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
+ x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
+ x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
/* do */
inner_loop = x86_get_label( func );
@@ -2126,7 +2087,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
- x86_mov( func, temp, get_argument( stride + 1 ) );
+ x86_mov( func, temp, x86_fn_arg( func, stride ) );
x86_push( func, aos_output );
sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
@@ -2150,7 +2111,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
x86_jcc( func, cc_NE, inner_loop );
/* Restore EBX */
- x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
+ x86_pop( func, aos_output );
}
/**
@@ -2185,6 +2146,17 @@ tgsi_emit_sse2(
tgsi_parse_init( &parse, tokens );
+ /* Can't just use EDI, EBX without save/restoring them:
+ */
+ x86_push(
+ func,
+ get_immediate_base() );
+
+ x86_push(
+ func,
+ get_temp_base() );
+
+
/*
* Different function args for vertex/fragment shaders:
*/
@@ -2193,51 +2165,55 @@ tgsi_emit_sse2(
x86_mov(
func,
get_input_base(),
- get_argument( 0 ) );
+ x86_fn_arg( func, 1 ) );
/* skipping outputs argument here */
x86_mov(
func,
get_const_base(),
- get_argument( 2 ) );
+ x86_fn_arg( func, 3 ) );
x86_mov(
func,
get_temp_base(),
- get_argument( 3 ) );
+ x86_fn_arg( func, 4 ) );
x86_mov(
func,
get_coef_base(),
- get_argument( 4 ) );
+ x86_fn_arg( func, 5 ) );
x86_mov(
func,
get_immediate_base(),
- get_argument( 5 ) );
+ x86_fn_arg( func, 6 ) );
}
else {
assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
if (do_swizzles)
- aos_to_soa( func, 5, 0, 6, 7 );
+ aos_to_soa( func,
+ 6, /* aos_input */
+ 1, /* machine->input */
+ 7, /* num_inputs */
+ 8 ); /* input_stride */
x86_mov(
func,
get_input_base(),
- get_argument( 0 ) );
+ x86_fn_arg( func, 1 ) );
x86_mov(
func,
get_output_base(),
- get_argument( 1 ) );
+ x86_fn_arg( func, 2 ) );
x86_mov(
func,
get_const_base(),
- get_argument( 2 ) );
+ x86_fn_arg( func, 3 ) );
x86_mov(
func,
get_temp_base(),
- get_argument( 3 ) );
+ x86_fn_arg( func, 4 ) );
x86_mov(
func,
get_immediate_base(),
- get_argument( 4 ) );
+ x86_fn_arg( func, 5 ) );
}
while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
@@ -2260,7 +2236,7 @@ tgsi_emit_sse2(
x86_mov(
func,
get_output_base(),
- get_argument( 1 ) );
+ x86_fn_arg( func, 2 ) );
}
}
@@ -2307,9 +2283,19 @@ tgsi_emit_sse2(
if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
if (do_swizzles)
- soa_to_aos( func, 8, 1, 9, 10 );
+ soa_to_aos( func, 9, 2, 10, 11 );
}
+ /* Can't just use EBX, EDI without save/restoring them:
+ */
+ x86_pop(
+ func,
+ get_temp_base() );
+
+ x86_pop(
+ func,
+ get_immediate_base() );
+
#ifdef WIN32
emit_retw( func, 16 );
#else
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 26bfc2051f..4c65ffd780 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -767,6 +767,31 @@ dump_instruction_short(
SID( dst->DstRegister.Index );
CHR( ']' );
+ switch (dst->DstRegisterExtModulate.Modulate) {
+ case TGSI_MODULATE_1X:
+ break;
+ case TGSI_MODULATE_2X:
+ TXT( "_2X" );
+ break;
+ case TGSI_MODULATE_4X:
+ TXT( "_4X" );
+ break;
+ case TGSI_MODULATE_8X:
+ TXT( "_8X" );
+ break;
+ case TGSI_MODULATE_HALF:
+ TXT( "_D2" );
+ break;
+ case TGSI_MODULATE_QUARTER:
+ TXT( "_D4" );
+ break;
+ case TGSI_MODULATE_EIGHTH:
+ TXT( "_D8" );
+ break;
+ default:
+ assert( 0 );
+ }
+
if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) {
CHR( '.' );
if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_X ) {
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
index c3526cb71f..5bea773840 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
@@ -43,7 +43,7 @@ tgsi_full_token_free(
union tgsi_full_token *full_token )
{
if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) {
- FREE( full_token->FullImmediate.u.Pointer );
+ FREE( (void *) full_token->FullImmediate.u.Pointer );
}
}
@@ -156,7 +156,7 @@ tgsi_parse_token(
imm->u.Pointer = MALLOC(
sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) );
for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
- next_token( ctx, &imm->u.ImmediateFloat32[i] );
+ next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] );
}
break;
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
index da0121c482..15e76feb7c 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -52,8 +52,8 @@ struct tgsi_full_immediate
struct tgsi_immediate Immediate;
union
{
- void *Pointer;
- struct tgsi_immediate_float32 *ImmediateFloat32;
+ const void *Pointer;
+ const struct tgsi_immediate_float32 *ImmediateFloat32;
} u;
};
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index ea4a72967d..65650ed22a 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -103,18 +103,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->file_max[file] = MAX2(info->file_max[file], (int)i);
if (file == TGSI_FILE_INPUT) {
- info->input_semantic_name[info->num_inputs]
- = (ubyte)fulldecl->Semantic.SemanticName;
- info->input_semantic_index[info->num_inputs]
- = (ubyte)fulldecl->Semantic.SemanticIndex;
+ info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
+ info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
info->num_inputs++;
}
if (file == TGSI_FILE_OUTPUT) {
- info->output_semantic_name[info->num_outputs]
- = (ubyte)fulldecl->Semantic.SemanticName;
- info->output_semantic_index[info->num_outputs]
- = (ubyte)fulldecl->Semantic.SemanticIndex;
+ info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
+ info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
info->num_outputs++;
}
@@ -137,6 +133,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
}
}
+ assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs );
+ assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs );
+
info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] ||
info->opcode_count[TGSI_OPCODE_KILP]);
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index f590d48b78..a54ac5a82f 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -404,7 +404,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
struct x86_reg srcEAX = x86_make_reg(file_REG32, reg_CX);
struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP);
struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI);
- uint8_t *fixup, *label;
+ int fixup, label;
unsigned j;
p->func = func;
diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c
index f1fb07bf5b..4ec1746662 100644
--- a/src/gallium/auxiliary/util/p_debug.c
+++ b/src/gallium/auxiliary/util/p_debug.c
@@ -422,4 +422,4 @@ void debug_print_format(const char *msg, unsigned fmt )
debug_printf("%s: %s\n", msg, fmtstr);
}
-#endif \ No newline at end of file
+#endif
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 257473ab26..b70bcbfa66 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -296,6 +296,8 @@ util_blit_pixels(struct blit_state *ctx,
src, srcLeft, srcTop, /* src */
srcW, srcH); /* size */
+ pipe->texture_update(pipe, tex, 0, 1 << 0);
+
/* save state (restored below) */
cso_save_blend(ctx->cso);
cso_save_depth_stencil_alpha(ctx->cso);
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 6ed5503c9a..056ae829ae 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -505,6 +505,9 @@ format_to_type_comps(enum pipe_format pformat,
return;
default:
assert(0);
+ *datatype = UBYTE;
+ *comps = 0;
+ break;
}
}