summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h5
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c23
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c27
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c9
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c37
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h36
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c613
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.h28
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_sse.c24
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_fs.c30
-rw-r--r--src/gallium/state_trackers/egl/egl_context.c12
-rw-r--r--src/gallium/state_trackers/egl/egl_surface.c11
-rw-r--r--src/gallium/winsys/egl_xlib/egl_xlib.c23
16 files changed, 543 insertions, 340 deletions
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 81e4eae401..41fcb16a0a 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -44,7 +44,6 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
-#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_scan.h"
@@ -55,6 +54,8 @@ struct draw_vertex_shader;
struct draw_context;
struct draw_stage;
struct vbuf_render;
+struct tgsi_exec_machine;
+struct tgsi_sampler;
/**
@@ -185,7 +186,7 @@ struct draw_context
uint position_output;
/** TGSI program interpreter runtime state */
- struct tgsi_exec_machine machine;
+ struct tgsi_exec_machine *machine;
uint num_samplers;
struct tgsi_sampler **samplers;
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index c057cd67fd..790e89ed82 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -43,6 +43,8 @@
#include "translate/translate.h"
#include "translate/translate_cache.h"
+#include "tgsi/tgsi_exec.h"
+
@@ -146,16 +148,8 @@ draw_delete_vertex_shader(struct draw_context *draw,
boolean
draw_vs_init( struct draw_context *draw )
{
- tgsi_exec_machine_init(&draw->vs.machine);
-
- /* FIXME: give this machine thing a proper constructor:
- */
- draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
- if (!draw->vs.machine.Inputs)
- return FALSE;
-
- draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
- if (!draw->vs.machine.Outputs)
+ draw->vs.machine = tgsi_exec_machine_create();
+ if (!draw->vs.machine)
return FALSE;
draw->vs.emit_cache = translate_cache_create();
@@ -178,12 +172,6 @@ draw_vs_init( struct draw_context *draw )
void
draw_vs_destroy( struct draw_context *draw )
{
- if (draw->vs.machine.Inputs)
- align_free(draw->vs.machine.Inputs);
-
- if (draw->vs.machine.Outputs)
- align_free(draw->vs.machine.Outputs);
-
if (draw->vs.fetch_cache)
translate_cache_destroy(draw->vs.fetch_cache);
@@ -196,8 +184,7 @@ draw_vs_destroy( struct draw_context *draw )
if (draw->vs.aligned_constant_storage)
align_free((void*)draw->vs.aligned_constant_storage);
- tgsi_exec_machine_free_data(&draw->vs.machine);
-
+ tgsi_exec_machine_destroy(draw->vs.machine);
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index f2368dde5c..41cc802613 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -41,6 +41,7 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_exec.h"
struct exec_vertex_shader {
@@ -201,7 +202,7 @@ draw_create_vs_exec(struct draw_context *draw,
vs->base.run_linear = vs_exec_run_linear;
vs->base.delete = vs_exec_delete;
vs->base.create_varient = draw_vs_varient_generic;
- vs->machine = &draw->vs.machine;
+ vs->machine = draw->vs.machine;
return &vs->base;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index d35db57d57..ad184bd696 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -48,6 +48,7 @@
#include "rtasm/rtasm_ppc.h"
#include "tgsi/tgsi_ppc.h"
#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_exec.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 77ba5152f9..fb58983e01 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -48,27 +48,16 @@
#include "rtasm/rtasm_x86sse.h"
#include "tgsi/tgsi_sse2.h"
#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_exec.h"
#define SSE_MAX_VERTICES 4
-typedef void (PIPE_CDECL *codegen_function) (
- const struct tgsi_exec_vector *input, /* 1 */
- struct tgsi_exec_vector *output, /* 2 */
- float (*constant)[4], /* 3 */
- struct tgsi_exec_vector *temporary, /* 4 */
- float (*immediates)[4], /* 5 */
- const float (*aos_input)[4], /* 6 */
- uint num_inputs, /* 7 */
- uint input_stride, /* 8 */
- float (*aos_output)[4], /* 9 */
- uint num_outputs, /* 10 */
- uint output_stride ); /* 11 */
struct draw_sse_vertex_shader {
struct draw_vertex_shader base;
struct x86_function sse2_program;
- codegen_function func;
+ tgsi_sse2_vs_func func;
struct tgsi_exec_machine *machine;
};
@@ -118,11 +107,9 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
/* run compiled shader
*/
- shader->func(machine->Inputs,
- machine->Outputs,
- (float (*)[4])constants,
- machine->Temps,
- (float (*)[4])shader->base.immediates,
+ shader->func(machine,
+ constants,
+ shader->base.immediates,
input,
base->info.num_inputs,
input_stride,
@@ -184,7 +171,7 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
sizeof(float), 16);
- vs->machine = &draw->vs.machine;
+ vs->machine = draw->vs.machine;
x86_init_func( &vs->sse2_program );
@@ -194,7 +181,7 @@ draw_create_vs_sse(struct draw_context *draw,
TRUE ))
goto fail;
- vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
+ vs->func = (tgsi_sse2_vs_func) x86_get_func( &vs->sse2_program );
if (!vs->func) {
goto fail;
}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 57fcf6de2a..1acf3c373e 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -993,6 +993,15 @@ void sse_pmovmskb( struct x86_function *p,
emit_modrm(p, dst, src);
}
+void sse_movmskps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src)
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, X86_TWOB, 0x50);
+ emit_modrm(p, dst, src);
+}
+
/***********************************************************************
* SSE2 instructions
*/
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 1b5eaaca85..731a651796 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -223,6 +223,7 @@ void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg sr
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 5cb322a5fa..fe571a86bc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -365,15 +365,26 @@ tgsi_exec_machine_bind_shader(
}
-void
-tgsi_exec_machine_init(
- struct tgsi_exec_machine *mach )
+struct tgsi_exec_machine *
+tgsi_exec_machine_create( void )
{
+ struct tgsi_exec_machine *mach;
uint i;
- mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
+ mach = align_malloc( sizeof *mach, 16 );
+ if (!mach)
+ goto fail;
+
mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
+ mach->Samplers = NULL;
+ mach->Consts = NULL;
+ mach->Tokens = NULL;
+ mach->Primitives = NULL;
+ mach->InterpCoefs = NULL;
+ mach->Instructions = NULL;
+ mach->Declarations = NULL;
+
/* Setup constants. */
for( i = 0; i < 4; i++ ) {
mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
@@ -393,22 +404,24 @@ tgsi_exec_machine_init(
(void) print_chan;
(void) print_temp;
#endif
+
+ return mach;
+
+fail:
+ align_free(mach);
+ return NULL;
}
void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
+tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
{
- if (mach->Instructions) {
+ if (mach) {
FREE(mach->Instructions);
- mach->Instructions = NULL;
- mach->NumInstructions = 0;
- }
- if (mach->Declarations) {
FREE(mach->Declarations);
- mach->Declarations = NULL;
- mach->NumDeclarations = 0;
}
+
+ align_free(mach);
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index da22baad3e..8a9100f4c3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -29,6 +29,7 @@
#define TGSI_EXEC_H
#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
#if defined __cplusplus
extern "C" {
@@ -94,7 +95,6 @@ struct tgsi_exec_labels
#define TGSI_EXEC_NUM_TEMPS 128
-#define TGSI_EXEC_NUM_TEMP_EXTRAS 6
#define TGSI_EXEC_NUM_IMMEDIATES 256
/*
@@ -162,9 +162,14 @@ struct tgsi_exec_labels
#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3)
#define TGSI_EXEC_MASK_C 2
+/* 4 register buffer for various purposes */
#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
+#define TGSI_EXEC_NUM_TEMP_R 4
+
+#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8)
+#define TGSI_EXEC_NUM_ADDRS 1
+#define TGSI_EXEC_NUM_TEMP_EXTRAS 9
-#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5)
#define TGSI_EXEC_MAX_COND_NESTING 20
@@ -187,24 +192,21 @@ struct tgsi_exec_labels
struct tgsi_exec_machine
{
/* Total = program temporaries + internal temporaries
- * + 1 padding to align to 16 bytes
*/
- struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS +
- TGSI_EXEC_NUM_TEMP_EXTRAS + 1];
+ struct tgsi_exec_vector Temps[TGSI_EXEC_NUM_TEMPS +
+ TGSI_EXEC_NUM_TEMP_EXTRAS];
+
+ float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
+
+ struct tgsi_exec_vector Inputs[PIPE_MAX_ATTRIBS];
+ struct tgsi_exec_vector Outputs[PIPE_MAX_ATTRIBS];
- /*
- * This will point to _Temps after aligning to 16B boundary.
- */
- struct tgsi_exec_vector *Temps;
struct tgsi_exec_vector *Addrs;
struct tgsi_sampler **Samplers;
- float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
unsigned ImmLimit;
const float (*Consts)[4];
- struct tgsi_exec_vector *Inputs;
- struct tgsi_exec_vector *Outputs;
const struct tgsi_token *Tokens; /**< Declarations, instructions */
unsigned Processor; /**< TGSI_PROCESSOR_x */
@@ -251,9 +253,11 @@ struct tgsi_exec_machine
struct tgsi_exec_labels Labels;
};
+struct tgsi_exec_machine *
+tgsi_exec_machine_create( void );
+
void
-tgsi_exec_machine_init(
- struct tgsi_exec_machine *mach );
+tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach);
void
@@ -268,10 +272,6 @@ tgsi_exec_machine_run(
struct tgsi_exec_machine *mach );
-void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
-
-
static INLINE void
tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
{
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index ba2bfdef06..df49638d51 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -32,6 +32,7 @@
#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
+#include "util/u_memory.h"
#if defined(PIPE_ARCH_SSE)
#include "util/u_sse.h"
#endif
@@ -100,37 +101,55 @@ get_const_base( void )
{
return x86_make_reg(
file_REG32,
- reg_CX );
+ reg_AX );
}
static struct x86_reg
-get_input_base( void )
+get_machine_base( void )
{
return x86_make_reg(
file_REG32,
- reg_AX );
+ reg_CX );
+}
+
+static struct x86_reg
+get_input_base( void )
+{
+ return x86_make_disp(
+ get_machine_base(),
+ Offset(struct tgsi_exec_machine, Inputs) );
}
static struct x86_reg
get_output_base( void )
{
- return x86_make_reg(
- file_REG32,
- reg_DX );
+ return x86_make_disp(
+ get_machine_base(),
+ Offset(struct tgsi_exec_machine, Outputs) );
}
static struct x86_reg
get_temp_base( void )
{
+ return x86_make_disp(
+ get_machine_base(),
+ Offset(struct tgsi_exec_machine, Temps) );
+}
+
+static struct x86_reg
+get_coef_base( void )
+{
return x86_make_reg(
file_REG32,
reg_BX );
}
static struct x86_reg
-get_coef_base( void )
+get_sampler_base( void )
{
- return get_output_base();
+ return x86_make_reg(
+ file_REG32,
+ reg_DI );
}
static struct x86_reg
@@ -138,7 +157,7 @@ get_immediate_base( void )
{
return x86_make_reg(
file_REG32,
- reg_DI );
+ reg_DX );
}
@@ -168,6 +187,15 @@ get_const(
}
static struct x86_reg
+get_sampler_ptr(
+ unsigned unit )
+{
+ return x86_make_disp(
+ get_sampler_base(),
+ unit * sizeof( struct tgsi_sampler * ) );
+}
+
+static struct x86_reg
get_input(
unsigned vec,
unsigned chan )
@@ -520,24 +548,15 @@ emit_coef_dady(
* that the stack pointer is 16 byte aligned, as expected.
*/
static void
-emit_func_call_dst(
+emit_func_call(
struct x86_function *func,
- unsigned xmm_save,
- unsigned xmm_dst,
+ unsigned xmm_save_mask,
+ const struct x86_reg *arg,
+ unsigned nr_args,
void (PIPE_CDECL *code)() )
{
struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
unsigned i, n;
- unsigned xmm_mask;
-
- /* Bitmask of the xmm registers to save */
- xmm_mask = (1 << xmm_save) - 1;
- xmm_mask &= ~(1 << xmm_dst);
-
- sse_movaps(
- func,
- get_temp( TEMP_R0, 0 ),
- make_xmm( xmm_dst ) );
x86_push(
func,
@@ -549,8 +568,10 @@ emit_func_call_dst(
func,
x86_make_reg( file_REG32, reg_DX) );
+ /* Store XMM regs to the stack
+ */
for(i = 0, n = 0; i < 8; ++i)
- if(xmm_mask & (1 << i))
+ if(xmm_save_mask & (1 << i))
++n;
x86_sub_imm(
@@ -559,26 +580,42 @@ emit_func_call_dst(
n*16);
for(i = 0, n = 0; i < 8; ++i)
- if(xmm_mask & (1 << i)) {
+ if(xmm_save_mask & (1 << i)) {
sse_movups(
func,
x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ),
make_xmm( i ) );
++n;
}
+
+ for (i = 0; i < nr_args; i++) {
+ /* Load the address of the buffer we use for passing arguments and
+ * receiving results:
+ */
+ x86_lea(
+ func,
+ ecx,
+ arg[i] );
- x86_lea(
- func,
- ecx,
- get_temp( TEMP_R0, 0 ) );
-
- x86_push( func, ecx );
+ /* Push actual function arguments (currently just the pointer to
+ * the buffer above), and call the function:
+ */
+ x86_push( func, ecx );
+ }
+
x86_mov_reg_imm( func, ecx, (unsigned long) code );
x86_call( func, ecx );
- x86_pop(func, ecx );
-
+
+ /* Pop the arguments (or just add an immediate to esp)
+ */
+ for (i = 0; i < nr_args; i++) {
+ x86_pop(func, ecx );
+ }
+
+ /* Pop the saved XMM regs:
+ */
for(i = 0, n = 0; i < 8; ++i)
- if(xmm_mask & (1 << i)) {
+ if(xmm_save_mask & (1 << i)) {
sse_movups(
func,
make_xmm( i ),
@@ -602,34 +639,86 @@ emit_func_call_dst(
x86_pop(
func,
x86_make_reg( file_REG32, reg_AX) );
+}
+
+static void
+emit_func_call_dst_src1(
+ struct x86_function *func,
+ unsigned xmm_save,
+ unsigned xmm_dst,
+ unsigned xmm_src0,
+ void (PIPE_CDECL *code)() )
+{
+ struct x86_reg store = get_temp( TEMP_R0, 0 );
+ unsigned xmm_mask = ((1 << xmm_save) - 1) & ~(1 << xmm_dst);
+
+ /* Store our input parameters (in xmm regs) to the buffer we use
+ * for passing arguments. We will pass a pointer to this buffer as
+ * the actual function argument.
+ */
+ sse_movaps(
+ func,
+ store,
+ make_xmm( xmm_src0 ) );
+
+ emit_func_call( func,
+ xmm_mask,
+ &store,
+ 1,
+ code );
sse_movaps(
func,
make_xmm( xmm_dst ),
- get_temp( TEMP_R0, 0 ) );
+ store );
}
+
static void
-emit_func_call_dst_src(
+emit_func_call_dst_src2(
struct x86_function *func,
unsigned xmm_save,
unsigned xmm_dst,
- unsigned xmm_src,
+ unsigned xmm_src0,
+ unsigned xmm_src1,
void (PIPE_CDECL *code)() )
{
+ struct x86_reg store = get_temp( TEMP_R0, 0 );
+ unsigned xmm_mask = ((1 << xmm_save) - 1) & ~(1 << xmm_dst);
+
+ /* Store two inputs to parameter buffer.
+ */
sse_movaps(
func,
- get_temp( TEMP_R0, 1 ),
- make_xmm( xmm_src ) );
+ store,
+ make_xmm( xmm_src0 ) );
- emit_func_call_dst(
+ sse_movaps(
func,
- xmm_save,
- xmm_dst,
- code );
+ x86_make_disp( store, 4 * sizeof(float) ),
+ make_xmm( xmm_src1 ) );
+
+
+ /* Emit the call
+ */
+ emit_func_call( func,
+ xmm_mask,
+ &store,
+ 1,
+ code );
+
+ /* Retrieve the results:
+ */
+ sse_movaps(
+ func,
+ make_xmm( xmm_dst ),
+ store );
}
+
+
+
#if defined(PIPE_ARCH_SSE)
/*
@@ -782,10 +871,11 @@ emit_cos(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
cos4f );
}
@@ -812,10 +902,11 @@ emit_ex2(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
ex24f );
}
@@ -857,10 +948,11 @@ emit_flr(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
flr4f );
}
@@ -880,10 +972,11 @@ emit_frc(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
frc4f );
}
@@ -910,10 +1003,11 @@ emit_lg2(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
lg24f );
}
@@ -975,13 +1069,15 @@ emit_pow(
struct x86_function *func,
unsigned xmm_save,
unsigned xmm_dst,
- unsigned xmm_src )
+ unsigned xmm_src0,
+ unsigned xmm_src1 )
{
- emit_func_call_dst_src(
+ emit_func_call_dst_src2(
func,
xmm_save,
xmm_dst,
- xmm_src,
+ xmm_src0,
+ xmm_src1,
pow4f );
}
@@ -1017,10 +1113,11 @@ emit_rnd(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
rnd4f );
}
@@ -1099,10 +1196,11 @@ emit_sgn(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
sgn4f );
}
@@ -1121,10 +1219,11 @@ emit_sin (struct x86_function *func,
unsigned xmm_save,
unsigned xmm_dst)
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
sin4f );
}
@@ -1140,6 +1239,12 @@ emit_sub(
make_xmm( xmm_src ) );
}
+
+
+
+
+
+
/**
* Register fetch.
*/
@@ -1298,20 +1403,164 @@ emit_store(
#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
+
+static void PIPE_CDECL
+fetch_texel( struct tgsi_sampler **sampler,
+ float *store )
+{
+#if 0
+ uint j;
+
+ debug_printf("%s sampler: %p (%p) store: %p\n",
+ __FUNCTION__,
+ sampler, *sampler,
+ store );
+
+ debug_printf("lodbias %f\n", store[12]);
+
+ for (j = 0; j < 4; j++)
+ debug_printf("sample %d texcoord %f %f\n",
+ j,
+ store[0+j],
+ store[4+j]);
+#endif
+
+ {
+ float rgba[NUM_CHANNELS][QUAD_SIZE];
+ (*sampler)->get_samples(*sampler,
+ &store[0],
+ &store[4],
+ &store[8],
+ 0.0f, /*store[12], lodbias */
+ rgba);
+
+ memcpy( store, rgba, 16 * sizeof(float));
+ }
+
+#if 0
+ for (j = 0; j < 4; j++)
+ debug_printf("sample %d result %f %f %f %f\n",
+ j,
+ store[0+j],
+ store[4+j],
+ store[8+j],
+ store[12+j]);
+#endif
+}
+
/**
* High-level instruction translators.
*/
static void
+emit_tex( struct x86_function *func,
+ const struct tgsi_full_instruction *inst,
+ boolean lodbias,
+ boolean projected)
+{
+ const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ struct x86_reg args[2];
+ unsigned count;
+ unsigned i;
+
+ switch (inst->InstructionExtTexture.Texture) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ count = 1;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ count = 2;
+ break;
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ count = 3;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ if (lodbias) {
+ FETCH( func, *inst, 3, 0, 3 );
+ }
+ else {
+ emit_tempf(
+ func,
+ 3,
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C );
+
+ }
+
+ /* store lodbias whether enabled or not -- fetch_texel currently
+ * respects it always.
+ */
+ sse_movaps( func,
+ get_temp( TEMP_R0, 3 ),
+ make_xmm( 3 ) );
+
+
+ if (projected) {
+ FETCH( func, *inst, 3, 0, 3 );
+
+ emit_rcp( func, 3, 3 );
+ }
+
+ for (i = 0; i < count; i++) {
+ FETCH( func, *inst, i, 0, i );
+
+ if (projected) {
+ sse_mulps(
+ func,
+ make_xmm( i ),
+ make_xmm( 3 ) );
+ }
+
+ /* Store in the argument buffer:
+ */
+ sse_movaps(
+ func,
+ get_temp( TEMP_R0, i ),
+ make_xmm( i ) );
+ }
+
+ args[0] = get_temp( TEMP_R0, 0 );
+ args[1] = get_sampler_ptr( unit );
+
+
+ emit_func_call( func,
+ 0,
+ args,
+ Elements(args),
+ fetch_texel );
+
+ /* If all four channels are enabled, could use a pointer to
+ * dst[0].x instead of TEMP_R0 for store?
+ */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, i ) {
+
+ sse_movaps(
+ func,
+ make_xmm( 0 ),
+ get_temp( TEMP_R0, i ) );
+
+ STORE( func, *inst, 0, 0, i );
+ }
+}
+
+
+static void
emit_kil(
struct x86_function *func,
const struct tgsi_full_src_register *reg )
{
unsigned uniquemask;
- unsigned registers[4];
- unsigned nextregister = 0;
- unsigned firstchan = ~0;
+ unsigned unique_count = 0;
unsigned chan_index;
+ unsigned i;
/* This mask stores component bits that were already tested. Note that
* we test if the value is less than zero, so 1.0 and 0.0 need not to be
@@ -1331,18 +1580,11 @@ emit_kil(
uniquemask |= 1 << swizzle;
/* allocate register */
- registers[chan_index] = nextregister;
emit_fetch(
func,
- nextregister,
+ unique_count++,
reg,
chan_index );
- nextregister++;
-
- /* mark the first channel used */
- if( firstchan == ~0 ) {
- firstchan = chan_index;
- }
}
}
@@ -1353,32 +1595,32 @@ emit_kil(
func,
x86_make_reg( file_REG32, reg_DX ) );
- FOR_EACH_CHANNEL( chan_index ) {
- if( uniquemask & (1 << chan_index) ) {
- sse_cmpps(
+ for (i = 0 ; i < unique_count; i++ ) {
+ struct x86_reg dataXMM = make_xmm(i);
+
+ sse_cmpps(
+ func,
+ dataXMM,
+ get_temp(
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C ),
+ cc_LessThan );
+
+ if( i == 0 ) {
+ sse_movmskps(
func,
- make_xmm( registers[chan_index] ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ),
- cc_LessThan );
-
- if( chan_index == firstchan ) {
- sse_pmovmskb(
- func,
- x86_make_reg( file_REG32, reg_AX ),
- make_xmm( registers[chan_index] ) );
- }
- else {
- sse_pmovmskb(
- func,
- x86_make_reg( file_REG32, reg_DX ),
- make_xmm( registers[chan_index] ) );
- x86_or(
- func,
- x86_make_reg( file_REG32, reg_AX ),
- x86_make_reg( file_REG32, reg_DX ) );
- }
+ x86_make_reg( file_REG32, reg_AX ),
+ dataXMM );
+ }
+ else {
+ sse_movmskps(
+ func,
+ x86_make_reg( file_REG32, reg_DX ),
+ dataXMM );
+ x86_or(
+ func,
+ x86_make_reg( file_REG32, reg_AX ),
+ x86_make_reg( file_REG32, reg_DX ) );
}
}
@@ -1573,7 +1815,7 @@ emit_instruction(
get_temp(
TGSI_EXEC_TEMP_MINUS_128_I,
TGSI_EXEC_TEMP_MINUS_128_C ) );
- emit_pow( func, 3, 1, 2 );
+ emit_pow( func, 3, 1, 1, 2 );
FETCH( func, *inst, 0, 0, CHAN_X );
sse_xorps(
func,
@@ -1917,7 +2159,7 @@ emit_instruction(
/* TGSI_OPCODE_POW */
FETCH( func, *inst, 0, 0, CHAN_X );
FETCH( func, *inst, 1, 1, CHAN_X );
- emit_pow( func, 0, 0, 1 );
+ emit_pow( func, 0, 0, 0, 1 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -2086,21 +2328,7 @@ emit_instruction(
break;
case TGSI_OPCODE_TEX:
- if (0) {
- /* Disable dummy texture code:
- */
- emit_tempf(
- func,
- 0,
- TEMP_ONE_I,
- TEMP_ONE_C );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- }
- else {
- return 0;
- }
+ emit_tex( func, inst, FALSE, FALSE );
break;
case TGSI_OPCODE_TXD:
@@ -2198,7 +2426,7 @@ emit_instruction(
break;
case TGSI_OPCODE_TXB:
- return 0;
+ emit_tex( func, inst, TRUE, FALSE );
break;
case TGSI_OPCODE_NRM:
@@ -2306,9 +2534,13 @@ emit_instruction(
break;
case TGSI_OPCODE_TXL:
- return 0;
+ emit_tex( func, inst, TRUE, FALSE );
break;
+ case TGSI_OPCODE_TXP:
+ emit_tex( func, inst, FALSE, TRUE );
+ break;
+
case TGSI_OPCODE_BRK:
return 0;
break;
@@ -2488,7 +2720,7 @@ emit_declaration(
static void aos_to_soa( struct x86_function *func,
uint arg_aos,
- uint arg_soa,
+ uint arg_machine,
uint arg_num,
uint arg_stride )
{
@@ -2503,7 +2735,10 @@ static void aos_to_soa( struct x86_function *func,
x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) );
- x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) );
+ x86_mov( func, soa_input, x86_fn_arg( func, arg_machine ) );
+ x86_lea( func, soa_input,
+ x86_make_disp( soa_input,
+ Offset(struct tgsi_exec_machine, Inputs) ) );
x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
x86_mov( func, stride, x86_fn_arg( func, arg_stride ) );
@@ -2545,28 +2780,30 @@ static void aos_to_soa( struct x86_function *func,
x86_jcc( func, cc_NE, inner_loop );
/* Restore EBX */
- x86_pop( func, aos_input );
+ x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
}
-static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
+static void soa_to_aos( struct x86_function *func,
+ uint arg_aos,
+ uint arg_machine,
+ uint arg_num,
+ uint arg_stride )
{
- struct x86_reg soa_output;
- struct x86_reg aos_output;
- struct x86_reg num_outputs;
- struct x86_reg temp;
+ struct x86_reg soa_output = x86_make_reg( file_REG32, reg_AX );
+ struct x86_reg aos_output = x86_make_reg( file_REG32, reg_BX );
+ struct x86_reg num_outputs = x86_make_reg( file_REG32, reg_CX );
+ struct x86_reg temp = x86_make_reg( file_REG32, reg_DX );
int inner_loop;
- soa_output = x86_make_reg( file_REG32, reg_AX );
- aos_output = x86_make_reg( file_REG32, reg_BX );
- num_outputs = x86_make_reg( file_REG32, reg_CX );
- temp = x86_make_reg( file_REG32, reg_DX );
-
/* Save EBX */
- x86_push( func, aos_output );
+ x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
- x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
- x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
- x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
+ x86_mov( func, aos_output, x86_fn_arg( func, arg_aos ) );
+ x86_mov( func, soa_output, x86_fn_arg( func, arg_machine ) );
+ x86_lea( func, soa_output,
+ x86_make_disp( soa_output,
+ Offset(struct tgsi_exec_machine, Outputs) ) );
+ x86_mov( func, num_outputs, x86_fn_arg( func, arg_num ) );
/* do */
inner_loop = x86_get_label( func );
@@ -2583,7 +2820,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
- x86_mov( func, temp, x86_fn_arg( func, stride ) );
+ x86_mov( func, temp, x86_fn_arg( func, arg_stride ) );
x86_push( func, aos_output );
sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
@@ -2607,20 +2844,13 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
x86_jcc( func, cc_NE, inner_loop );
/* Restore EBX */
- x86_pop( func, aos_output );
+ x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
}
/**
* Translate a TGSI vertex/fragment shader to SSE2 code.
* Slightly different things are done for vertex vs. fragment shaders.
*
- * Note that fragment shaders are responsible for interpolating shader
- * inputs. Because on x86 we have only 4 GP registers, and here we
- * have 5 shader arguments (input, output, const, temp and coef), the
- * code is split into two phases -- DECLARATION and INSTRUCTION phase.
- * GP register holding the output argument is aliased with the coeff
- * argument, as outputs are not needed in the DECLARATION phase.
- *
* \param tokens the TGSI input shader
* \param func the output SSE code/function
* \param immediates buffer to place immediates, later passed to SSE func
@@ -2634,7 +2864,6 @@ tgsi_emit_sse2(
boolean do_swizzles )
{
struct tgsi_parse_context parse;
- boolean instruction_phase = FALSE;
unsigned ok = 1;
uint num_immediates = 0;
@@ -2646,74 +2875,48 @@ tgsi_emit_sse2(
/* Can't just use EDI, EBX without save/restoring them:
*/
- x86_push(
- func,
- get_immediate_base() );
-
- x86_push(
- func,
- get_temp_base() );
-
+ x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
+ x86_push( func, x86_make_reg( file_REG32, reg_DI ) );
/*
* Different function args for vertex/fragment shaders:
*/
- if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
- /* DECLARATION phase, do not load output argument. */
- x86_mov(
- func,
- get_input_base(),
- x86_fn_arg( func, 1 ) );
- /* skipping outputs argument here */
- x86_mov(
- func,
- get_const_base(),
- x86_fn_arg( func, 3 ) );
- x86_mov(
- func,
- get_temp_base(),
- x86_fn_arg( func, 4 ) );
- x86_mov(
- func,
- get_coef_base(),
- x86_fn_arg( func, 5 ) );
- x86_mov(
- func,
- get_immediate_base(),
- x86_fn_arg( func, 6 ) );
- }
- else {
- assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
-
+ if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
if (do_swizzles)
aos_to_soa( func,
- 6, /* aos_input */
- 1, /* machine->input */
- 7, /* num_inputs */
- 8 ); /* input_stride */
+ 4, /* aos_input */
+ 1, /* machine */
+ 5, /* num_inputs */
+ 6 ); /* input_stride */
+ }
+ x86_mov(
+ func,
+ get_machine_base(),
+ x86_fn_arg( func, 1 ) );
+ x86_mov(
+ func,
+ get_const_base(),
+ x86_fn_arg( func, 2 ) );
+ x86_mov(
+ func,
+ get_immediate_base(),
+ x86_fn_arg( func, 3 ) );
+
+ if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
x86_mov(
- func,
- get_input_base(),
- x86_fn_arg( func, 1 ) );
- x86_mov(
- func,
- get_output_base(),
- x86_fn_arg( func, 2 ) );
- x86_mov(
- func,
- get_const_base(),
- x86_fn_arg( func, 3 ) );
- x86_mov(
- func,
- get_temp_base(),
- x86_fn_arg( func, 4 ) );
+ func,
+ get_coef_base(),
+ x86_fn_arg( func, 4 ) );
+
x86_mov(
- func,
- get_immediate_base(),
- x86_fn_arg( func, 5 ) );
+ func,
+ get_sampler_base(),
+ x86_make_disp( get_machine_base(),
+ Offset( struct tgsi_exec_machine, Samplers ) ) );
}
+
while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
tgsi_parse_token( &parse );
@@ -2727,17 +2930,6 @@ tgsi_emit_sse2(
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
- if( !instruction_phase ) {
- /* INSTRUCTION phase, overwrite coeff with output. */
- instruction_phase = TRUE;
- x86_mov(
- func,
- get_output_base(),
- x86_fn_arg( func, 2 ) );
- }
- }
-
ok = emit_instruction(
func,
&parse.FullToken.FullInstruction );
@@ -2781,18 +2973,17 @@ tgsi_emit_sse2(
if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
if (do_swizzles)
- soa_to_aos( func, 9, 2, 10, 11 );
+ soa_to_aos( func,
+ 7, /* aos_output */
+ 1, /* machine */
+ 8, /* num_outputs */
+ 9 ); /* output_stride */
}
/* Can't just use EBX, EDI without save/restoring them:
*/
- x86_pop(
- func,
- get_temp_base() );
-
- x86_pop(
- func,
- get_immediate_base() );
+ x86_pop( func, x86_make_reg( file_REG32, reg_DI ) );
+ x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
emit_ret( func );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
index af838b2a25..d81ee3d00e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
@@ -34,6 +34,7 @@ extern "C" {
struct tgsi_token;
struct x86_function;
+struct tgsi_interp_coef;
unsigned
tgsi_emit_sse2(
@@ -42,6 +43,33 @@ tgsi_emit_sse2(
float (*immediates)[4],
boolean do_swizzles );
+
+/* This is the function prototype generated when do_swizzles is false
+ * -- effectively for fragment shaders.
+ */
+typedef void (PIPE_CDECL *tgsi_sse2_fs_function) (
+ struct tgsi_exec_machine *machine, /* 1 */
+ const float (*constant)[4], /* 2 */
+ const float (*immediate)[4], /* 3 */
+ const struct tgsi_interp_coef *coef /* 4 */
+ );
+
+
+/* This is the function prototype generated when do_swizzles is true
+ * -- effectively for vertex shaders.
+ */
+typedef void (PIPE_CDECL *tgsi_sse2_vs_func) (
+ struct tgsi_exec_machine *machine, /* 1 */
+ const float (*constant)[4], /* 2 */
+ const float (*immediate)[4], /* 3 */
+ const float (*aos_input)[4], /* 4 */
+ uint num_inputs, /* 5 */
+ uint input_stride, /* 6 */
+ float (*aos_output)[4], /* 7 */
+ uint num_outputs, /* 8 */
+ uint output_stride ); /* 9 */
+
+
#if defined __cplusplus
}
#endif
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 31c3ca21c5..f4fa0905d7 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -45,17 +45,6 @@
#include "rtasm/rtasm_x86sse.h"
-/* Surely this should be defined somewhere in a tgsi header:
- */
-typedef void (PIPE_CDECL *codegen_function)(
- const struct tgsi_exec_vector *input,
- struct tgsi_exec_vector *output,
- const float (*constant)[4],
- struct tgsi_exec_vector *temporary,
- const struct tgsi_interp_coef *coef,
- float (*immediates)[4]
- //, const struct tgsi_exec_vector *quadPos
- );
/**
@@ -65,7 +54,7 @@ struct sp_sse_fragment_shader
{
struct sp_fragment_shader base;
struct x86_function sse2_program;
- codegen_function func;
+ tgsi_sse2_fs_function func;
float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
};
@@ -83,6 +72,7 @@ fs_sse_prepare( const struct sp_fragment_shader *base,
struct tgsi_exec_machine *machine,
struct tgsi_sampler **samplers )
{
+ machine->Samplers = samplers;
}
@@ -107,12 +97,10 @@ fs_sse_run( const struct sp_fragment_shader *base,
tgsi_set_kill_mask(machine, 0x0);
tgsi_set_exec_mask(machine, 1, 1, 1, 1);
- shader->func( machine->Inputs,
- machine->Outputs,
+ shader->func( machine,
machine->Consts,
- machine->Temps,
- machine->InterpCoefs,
- shader->immediates
+ (const float (*)[4])shader->immediates,
+ machine->InterpCoefs
// , &machine->QuadPos
);
@@ -151,7 +139,7 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe,
return NULL;
}
- shader->func = (codegen_function) x86_get_func( &shader->sse2_program );
+ shader->func = (tgsi_sse2_fs_function) x86_get_func( &shader->sse2_program );
if (!shader->func) {
x86_release_func( &shader->sse2_program );
FREE(shader);
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index ca637a1d6a..28f8d1a60e 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -52,7 +52,7 @@
struct quad_shade_stage
{
struct quad_stage stage; /**< base class */
- struct tgsi_exec_machine machine;
+ struct tgsi_exec_machine *machine;
struct tgsi_exec_vector *inputs, *outputs;
};
@@ -73,7 +73,7 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct quad_shade_stage *qss = quad_shade_stage( qs );
struct softpipe_context *softpipe = qs->softpipe;
- struct tgsi_exec_machine *machine = &qss->machine;
+ struct tgsi_exec_machine *machine = qss->machine;
boolean z_written;
/* Consts do not require 16 byte alignment. */
@@ -146,7 +146,7 @@ shade_begin(struct quad_stage *qs)
struct softpipe_context *softpipe = qs->softpipe;
softpipe->fs->prepare( softpipe->fs,
- &qss->machine,
+ qss->machine,
(struct tgsi_sampler **)
softpipe->tgsi.frag_samplers_list );
@@ -159,9 +159,8 @@ shade_destroy(struct quad_stage *qs)
{
struct quad_shade_stage *qss = (struct quad_shade_stage *) qs;
- tgsi_exec_machine_free_data(&qss->machine);
- FREE( qss->inputs );
- FREE( qss->outputs );
+ tgsi_exec_machine_destroy(qss->machine);
+
FREE( qs );
}
@@ -170,19 +169,24 @@ struct quad_stage *
sp_quad_shade_stage( struct softpipe_context *softpipe )
{
struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
-
- /* allocate storage for program inputs/outputs, aligned to 16 bytes */
- qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16);
- qss->outputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->outputs) + 16);
- qss->machine.Inputs = align16(qss->inputs);
- qss->machine.Outputs = align16(qss->outputs);
+ if (!qss)
+ goto fail;
qss->stage.softpipe = softpipe;
qss->stage.begin = shade_begin;
qss->stage.run = shade_quad;
qss->stage.destroy = shade_destroy;
- tgsi_exec_machine_init( &qss->machine );
+ qss->machine = tgsi_exec_machine_create();
+ if (!qss->machine)
+ goto fail;
return &qss->stage;
+
+fail:
+ if (qss && qss->machine)
+ tgsi_exec_machine_destroy(qss->machine);
+
+ FREE(qss);
+ return NULL;
}
diff --git a/src/gallium/state_trackers/egl/egl_context.c b/src/gallium/state_trackers/egl/egl_context.c
index edd49486e5..f03a29582a 100644
--- a/src/gallium/state_trackers/egl/egl_context.c
+++ b/src/gallium/state_trackers/egl/egl_context.c
@@ -113,7 +113,7 @@ drm_create_context(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, EGLContext
if (!ctx)
goto err_c;
- _eglInitContext(drv, dpy, &ctx->base, config, attrib_list);
+ _eglInitContext(drv, dpy, &ctx->base, conf, attrib_list);
ctx->pipe = dev->api->create_context(dev->api, dev->screen);
if (!ctx->pipe)
@@ -129,8 +129,8 @@ drm_create_context(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, EGLContext
if (!ctx->st)
goto err_gl;
- /* generate handle and insert into hash table */
- _eglSaveContext(&ctx->base);
+ /* link to display */
+ _eglLinkContext(&ctx->base, _eglLookupDisplay(dpy));
assert(_eglGetContextHandle(&ctx->base));
return _eglGetContextHandle(&ctx->base);
@@ -147,10 +147,8 @@ EGLBoolean
drm_destroy_context(_EGLDriver *drv, EGLDisplay dpy, EGLContext context)
{
struct drm_context *c = lookup_drm_context(context);
- _eglRemoveContext(&c->base);
- if (c->base.IsBound) {
- c->base.DeletePending = EGL_TRUE;
- } else {
+ _eglUnlinkContext(&c->base);
+ if (!c->base.IsBound) {
st_destroy_context(c->st);
c->pipe->destroy(c->pipe);
free(c);
diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c
index de8194a46a..86f2ea97e5 100644
--- a/src/gallium/state_trackers/egl/egl_surface.c
+++ b/src/gallium/state_trackers/egl/egl_surface.c
@@ -232,7 +232,7 @@ drm_create_pbuffer_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config,
if (!surf)
goto err;
- if (!_eglInitSurface(drv, dpy, &surf->base, EGL_PBUFFER_BIT, config, attrib_list))
+ if (!_eglInitSurface(drv, &surf->base, EGL_PBUFFER_BIT, conf, attrib_list))
goto err_surf;
surf->w = width;
@@ -245,7 +245,7 @@ drm_create_pbuffer_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config,
(void*)surf);
drm_visual_modes_destroy(visual);
- _eglSaveSurface(&surf->base);
+ _eglLinkSurface(&surf->base, _eglLookupDisplay(dpy));
return surf->base.Handle;
err_surf:
@@ -364,10 +364,9 @@ EGLBoolean
drm_destroy_surface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface)
{
struct drm_surface *surf = lookup_drm_surface(surface);
- _eglRemoveSurface(&surf->base);
- if (surf->base.IsBound) {
- surf->base.DeletePending = EGL_TRUE;
- } else {
+ _eglUnlinkSurface(&surf->base);
+
+ if (!surf->base.IsBound) {
if (surf->screen)
drm_takedown_shown_screen(drv, surf->screen);
st_unreference_framebuffer(surf->stfb);
diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c
index c10e3c00ff..e1ddcae97b 100644
--- a/src/gallium/winsys/egl_xlib/egl_xlib.c
+++ b/src/gallium/winsys/egl_xlib/egl_xlib.c
@@ -345,7 +345,7 @@ xlib_eglCreateContext(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config,
return EGL_NO_CONTEXT;
/* let EGL lib init the common stuff */
- if (!_eglInitContext(drv, dpy, &ctx->Base, config, attrib_list)) {
+ if (!_eglInitContext(drv, &ctx->Base, conf, attrib_list)) {
free(ctx);
return EGL_NO_CONTEXT;
}
@@ -370,7 +370,7 @@ xlib_eglCreateContext(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config,
return EGL_NO_CONTEXT;
}
- _eglSaveContext(&ctx->Base);
+ _eglLinkContext(&ctx->Base, _eglLookupDisplay(dpy));
return _eglGetContextHandle(&ctx->Base);
}
@@ -381,10 +381,8 @@ xlib_eglDestroyContext(_EGLDriver *drv, EGLDisplay dpy, EGLContext ctx)
{
struct xlib_egl_context *context = lookup_context(ctx);
if (context) {
- if (context->Base.IsBound) {
- context->Base.DeletePending = EGL_TRUE;
- }
- else {
+ _eglUnlinkContext(&context->Base);
+ if (!context->Base.IsBound) {
/* API-dependent clean-up */
switch (context->Base.ClientAPI) {
case EGL_OPENGL_ES_API:
@@ -491,13 +489,13 @@ xlib_eglCreateWindowSurface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config,
return EGL_NO_SURFACE;
/* Let EGL lib init the common stuff */
- if (!_eglInitSurface(drv, dpy, &surf->Base, EGL_WINDOW_BIT,
- config, attrib_list)) {
+ if (!_eglInitSurface(drv, &surf->Base, EGL_WINDOW_BIT,
+ conf, attrib_list)) {
free(surf);
return EGL_NO_SURFACE;
}
- _eglSaveSurface(&surf->Base);
+ _eglLinkSurface(&surf->Base, disp);
/*
* Now init the Xlib and gallium stuff
@@ -534,11 +532,8 @@ xlib_eglDestroySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface)
{
struct xlib_egl_surface *surf = lookup_surface(surface);
if (surf) {
- _eglHashRemove(_eglGlobal.Surfaces, (EGLuint) surface);
- if (surf->Base.IsBound) {
- surf->Base.DeletePending = EGL_TRUE;
- }
- else {
+ _eglUnlinkSurface(&surf->Base);
+ if (!surf->Base.IsBound) {
XFreeGC(surf->Dpy, surf->Gc);
st_unreference_framebuffer(surf->Framebuffer);
free(surf);