From 568fcf64c774f5f8e9f65bb86c121f5d550b1632 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 13 Dec 2007 20:38:56 +0000 Subject: 965: get fragment shader compiler compiling Don't think that it will run though. --- src/mesa/pipe/i915simple/i915_texture.c | 4 - src/mesa/pipe/i965simple/Makefile | 2 + src/mesa/pipe/i965simple/brw_batch.h | 3 +- src/mesa/pipe/i965simple/brw_cc.c | 2 +- src/mesa/pipe/i965simple/brw_clip_state.c | 4 +- src/mesa/pipe/i965simple/brw_context.c | 1 - src/mesa/pipe/i965simple/brw_context.h | 99 +- src/mesa/pipe/i965simple/brw_curbe.c | 79 +- src/mesa/pipe/i965simple/brw_eu.h | 11 + src/mesa/pipe/i965simple/brw_eu_emit.c | 18 +- src/mesa/pipe/i965simple/brw_gs_state.c | 3 +- src/mesa/pipe/i965simple/brw_misc_state.c | 6 +- src/mesa/pipe/i965simple/brw_sf.c | 213 ++- src/mesa/pipe/i965simple/brw_sf.h | 17 +- src/mesa/pipe/i965simple/brw_sf_emit.c | 340 +--- src/mesa/pipe/i965simple/brw_sf_state.c | 124 +- src/mesa/pipe/i965simple/brw_shader_info.c | 49 + src/mesa/pipe/i965simple/brw_state.c | 7 + src/mesa/pipe/i965simple/brw_state.h | 7 + src/mesa/pipe/i965simple/brw_state_cache.c | 5 +- src/mesa/pipe/i965simple/brw_state_pool.c | 9 +- src/mesa/pipe/i965simple/brw_state_upload.c | 4 +- src/mesa/pipe/i965simple/brw_tex_layout.c | 26 +- src/mesa/pipe/i965simple/brw_vs_state.c | 2 +- src/mesa/pipe/i965simple/brw_wm.c | 28 +- src/mesa/pipe/i965simple/brw_wm.h | 184 +- src/mesa/pipe/i965simple/brw_wm_decl.c | 377 ++++ src/mesa/pipe/i965simple/brw_wm_glsl.c | 2141 ++++++++++------------- src/mesa/pipe/i965simple/brw_wm_sampler_state.c | 9 +- src/mesa/pipe/i965simple/brw_wm_state.c | 50 +- src/mesa/pipe/p_util.h | 5 + src/mesa/pipe/softpipe/sp_texture.c | 4 - 32 files changed, 1795 insertions(+), 2038 deletions(-) create mode 100644 src/mesa/pipe/i965simple/brw_shader_info.c create mode 100644 src/mesa/pipe/i965simple/brw_wm_decl.c (limited to 'src/mesa') diff --git a/src/mesa/pipe/i915simple/i915_texture.c b/src/mesa/pipe/i915simple/i915_texture.c index fefd105adf..44f72e63cc 100644 --- a/src/mesa/pipe/i915simple/i915_texture.c +++ b/src/mesa/pipe/i915simple/i915_texture.c @@ -47,10 +47,6 @@ static unsigned minify( unsigned d ) return MAX2(1, d>>1); } -static int align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} static void diff --git a/src/mesa/pipe/i965simple/Makefile b/src/mesa/pipe/i965simple/Makefile index 21f40f72a0..48c00ab50b 100644 --- a/src/mesa/pipe/i965simple/Makefile +++ b/src/mesa/pipe/i965simple/Makefile @@ -31,6 +31,7 @@ DRIVER_SOURCES = \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ + brw_shader_info.c \ brw_state.c \ brw_state_batch.c \ brw_state_cache.c \ @@ -44,6 +45,7 @@ DRIVER_SOURCES = \ brw_vs_state.c \ brw_wm.c \ brw_wm_iz.c \ + brw_wm_decl.c \ brw_wm_glsl.c \ brw_wm_sampler_state.c \ brw_wm_state.c \ diff --git a/src/mesa/pipe/i965simple/brw_batch.h b/src/mesa/pipe/i965simple/brw_batch.h index 7c778f360b..bef69ac871 100644 --- a/src/mesa/pipe/i965simple/brw_batch.h +++ b/src/mesa/pipe/i965simple/brw_batch.h @@ -36,7 +36,7 @@ #define INTEL_BATCH_CLIPRECTS 0x2 #define BEGIN_BATCH( dwords, relocs ) \ - (brw->batch_start = brw->winsys->batch_start(brw->winsys, dwords, relocs)) + brw->winsys->batch_start(brw->winsys, dwords, relocs) #define OUT_BATCH( dword ) \ brw->winsys->batch_dword(brw->winsys, dword) @@ -50,7 +50,6 @@ */ #define FLUSH_BATCH(fence) do { \ brw->winsys->batch_flush(brw->winsys, fence); \ - brw->batch_start = NULL; \ brw->hardware_dirty = ~0; \ } while (0) diff --git a/src/mesa/pipe/i965simple/brw_cc.c b/src/mesa/pipe/i965simple/brw_cc.c index fc7fdba53f..6cc1505311 100644 --- a/src/mesa/pipe/i965simple/brw_cc.c +++ b/src/mesa/pipe/i965simple/brw_cc.c @@ -142,7 +142,7 @@ static void upload_cc_vp( struct brw_context *brw ) const struct brw_tracked_state brw_cc_vp = { .dirty = { - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_SCENE, .cache = 0 }, .update = upload_cc_vp diff --git a/src/mesa/pipe/i965simple/brw_clip_state.c b/src/mesa/pipe/i965simple/brw_clip_state.c index 51a4666a0b..ea5c05a279 100644 --- a/src/mesa/pipe/i965simple/brw_clip_state.c +++ b/src/mesa/pipe/i965simple/brw_clip_state.c @@ -32,7 +32,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" - +#include "pipe/p_util.h" static void upload_clip_unit( struct brw_context *brw ) @@ -43,7 +43,7 @@ static void upload_clip_unit( struct brw_context *brw ) /* CACHE_NEW_CLIP_PROG */ clip.thread0.grf_reg_count = - ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1; + align(brw->clip.prog_data->total_grf, 16) / 16 - 1; clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; diff --git a/src/mesa/pipe/i965simple/brw_context.c b/src/mesa/pipe/i965simple/brw_context.c index e69ba6938e..5e58701e91 100644 --- a/src/mesa/pipe/i965simple/brw_context.c +++ b/src/mesa/pipe/i965simple/brw_context.c @@ -237,7 +237,6 @@ struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys, brw->pci_id = pci_id; brw->dirty = ~0; brw->hardware_dirty = ~0; - brw->batch_start = NULL; memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); diff --git a/src/mesa/pipe/i965simple/brw_context.h b/src/mesa/pipe/i965simple/brw_context.h index 20528f00b3..318c6a7049 100644 --- a/src/mesa/pipe/i965simple/brw_context.h +++ b/src/mesa/pipe/i965simple/brw_context.h @@ -119,7 +119,6 @@ * Handles blending and (presumably) depth and stencil testing. */ -#define BRW_FALLBACK_TEXTURE 0x1 #define BRW_MAX_CURBE (32*16) struct brw_context; @@ -147,16 +146,13 @@ struct brw_winsys; /* Raised for other internal events: */ #define BRW_NEW_URB_FENCE 0x10000 -#define BRW_NEW_INPUT_DIMENSIONS 0x20000 +#define BRW_NEW_PSP 0x20000 #define BRW_NEW_CURBE_OFFSETS 0x40000 #define BRW_NEW_REDUCED_PRIMITIVE 0x80000 #define BRW_NEW_PRIMITIVE 0x100000 -#define BRW_NEW_CONTEXT 0x200000 -#define BRW_NEW_WM_INPUT_DIMENSIONS 0x400000 -#define BRW_NEW_INPUT_VARYING 0x800000 -#define BRW_NEW_PSP 0x1000000 +#define BRW_NEW_SCENE 0x200000 +#define BRW_NEW_SF_LINKAGE 0x400000 -#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) extern int BRW_DEBUG; #define DEBUG_TEXTURE 0x1 @@ -198,23 +194,47 @@ struct brw_state_flags { unsigned brw; }; + +struct brw_shader_info { + int nr_regs[8]; /* TGSI_FILE_* */ +}; + + + struct brw_vertex_program { struct pipe_shader_state program; - unsigned id; - unsigned param_state; /* flags indicating state tracked by params */ + struct brw_shader_info info; + int id; }; struct brw_fragment_program { struct pipe_shader_state program; - unsigned id; - unsigned param_state; /* flags indicating state tracked by params */ + struct brw_shader_info info; + + boolean UsesDepth; boolean UsesKill; boolean ComputesDepth; + int id; }; + + +struct pipe_setup_linkage { + struct { + unsigned vp_output:5; + unsigned interp_mode:4; + unsigned bf_vp_output:5; + } fp_input[PIPE_MAX_SHADER_INPUTS]; + + unsigned fp_input_count:5; + unsigned max_vp_output:5; +}; + + + struct brw_texture { struct pipe_texture base; @@ -248,6 +268,12 @@ struct brw_texture { * corresponding to a different brw_wm_prog_key struct, with different * compiled programs: */ +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ + struct brw_wm_prog_data { unsigned curb_read_length; unsigned urb_read_length; @@ -256,13 +282,14 @@ struct brw_wm_prog_data { unsigned total_grf; unsigned total_scratch; - unsigned nr_params; - boolean error; - - /* Pointer to tracked values (only valid once - * _mesa_load_state_parameters has been called at runtime). + /* Internally generated constants for the CURBE. These are loaded + * ahead of the data from the constant buffer. */ - const float *param[BRW_MAX_CURBE]; + const float internal_const[8]; + unsigned nr_internal_consts; + unsigned max_const; + + boolean error; }; struct brw_sf_prog_data { @@ -298,19 +325,14 @@ struct brw_vs_prog_data { unsigned inputs_read; + unsigned max_const; + /* Used for calculating urb partitions: */ unsigned urb_entry_size; }; -/* Size == 0 if output either not written, or always [0,0,0,1] - */ -struct brw_vs_ouput_sizes { - ubyte output_size[PIPE_MAX_SHADER_OUTPUTS]; -}; - - #define BRW_MAX_TEX_UNIT 8 #define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 @@ -374,8 +396,6 @@ struct brw_cache { struct brw_tracked_state { struct brw_state_flags dirty; void (*update)( struct brw_context *brw ); - void (*emit_reloc)( struct brw_context *brw ); - boolean always_update; }; @@ -455,8 +475,6 @@ struct brw_context struct { struct brw_state_flags dirty; - struct brw_tracked_state **atoms; - unsigned nr_atoms; } state; @@ -496,27 +514,16 @@ struct brw_context #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) - struct { - struct pipe_buffer_handle *vbo[BRW_NR_UPLOAD_BUFS]; - unsigned buf; - unsigned offset; - unsigned size; - unsigned wrap; - } upload; - /* Summary of size and varying of active arrays, so we can check * for changes to this state: */ struct brw_vertex_info info; - int last_vb; } vb; - unsigned *batch_start; unsigned hardware_dirty; unsigned dirty; unsigned pci_id; - /* BRW_NEW_URB_ALLOCATIONS: */ struct { @@ -557,11 +564,6 @@ struct brw_context unsigned vs_size; unsigned total_size; - /* Dynamic tracker which changes to reflect the state referenced - * by active fp and vp program parameters: - */ - struct brw_tracked_state tracked_state; - unsigned gs_offset; float *last_buf; @@ -595,6 +597,8 @@ struct brw_context struct { struct brw_sf_prog_data *prog_data; + struct pipe_setup_linkage linkage; + unsigned prog_gs_offset; unsigned vp_gs_offset; unsigned state_gs_offset; @@ -602,11 +606,8 @@ struct brw_context struct { struct brw_wm_prog_data *prog_data; - struct brw_wm_compile *compile_data; - /* Input sizes, calculated from active vertex program: - */ - unsigned input_size_masks[4]; +// struct brw_wm_compiler *compile_data; /** @@ -667,8 +668,6 @@ void brw_destroy_state(struct brw_context *brw); * brw_tex.c */ void brwUpdateTextureState( struct brw_context *brw ); -void brw_FrameBufferTexInit( struct brw_context *brw ); -void brw_FrameBufferTexDestroy( struct brw_context *brw ); /* brw_urb.c diff --git a/src/mesa/pipe/i965simple/brw_curbe.c b/src/mesa/pipe/i965simple/brw_curbe.c index 0894e82d56..b943a7af98 100644 --- a/src/mesa/pipe/i965simple/brw_curbe.c +++ b/src/mesa/pipe/i965simple/brw_curbe.c @@ -35,6 +35,9 @@ #include "brw_defines.h" #include "brw_state.h" #include "brw_util.h" +#include "brw_wm.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" #define FILE_DEBUG_FLAG DEBUG_FALLBACKS @@ -43,11 +46,10 @@ static void calculate_curbe_offsets( struct brw_context *brw ) { /* CACHE_NEW_WM_PROG */ - unsigned nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; + unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16); /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->attribs.VertexProgram; - unsigned nr_vp_regs = (vp->program.num_inputs * 4 + 15) / 16; + unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16); unsigned nr_clip_regs = 0; unsigned total_regs; @@ -55,7 +57,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) /* BRW_NEW_CLIP ? */ if (brw->attribs.Transform->ClipPlanesEnabled) { unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); - nr_clip_regs = (nr_planes * 4 + 15) / 16; + nr_clip_regs = align(nr_planes * 4, 16); } #endif @@ -172,28 +174,18 @@ static float fixed_plane[6][4] = { { 1, 0, 0, 1 } }; -#if 0 /* Upload a new set of constants. Too much variability to go into the * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ static void upload_constant_buffer(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; unsigned sz = brw->curbe.total_size; unsigned bufsz = sz * 16 * sizeof(float); float *buf; unsigned i; - /* Update our own dependency flags. This works because this - * function will also be called whenever fp or vp changes. - */ - brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); - brw->curbe.tracked_state.dirty.mesa |= vp->param_state; - brw->curbe.tracked_state.dirty.mesa |= fp->param_state; if (sz == 0) { struct brw_constant_buffer cb; @@ -220,10 +212,16 @@ static void upload_constant_buffer(struct brw_context *brw) if (brw->curbe.wm_size) { unsigned offset = brw->curbe.wm_start * 16; - _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* First the constant buffer constants: + */ + + /* Then any internally generated constants: + */ + for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++) + buf[offset + i] = brw->wm.prog_data->internal_const[i]; - for (i = 0; i < brw->wm.prog_data->nr_params; i++) - buf[offset + i] = brw->wm.prog_data->param[i][0]; + assert(brw->wm.prog_data->max_const == + brw->wm.prog_data->nr_internal_consts); } @@ -243,34 +241,26 @@ static void upload_constant_buffer(struct brw_context *brw) buf[offset + i * 4 + 3] = fixed_plane[i][3]; } - /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to - * clip-space: + /* Clip planes: BRW_NEW_CLIP: */ - assert(MAX_CLIP_PLANES == 6); - for (j = 0; j < MAX_CLIP_PLANES; j++) { - if (brw->attribs.Transform->ClipPlanesEnabled & (1<attribs.Transform->_ClipUserPlane[j][0]; - buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1]; - buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2]; - buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3]; - i++; - } + for (j = 0; j < brw->attribs.Clip.nr; j++) { + buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0]; + buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1]; + buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2]; + buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3]; + i++; } } if (brw->curbe.vs_size) { - unsigned offset = brw->curbe.vs_start * 16; - unsigned nr = vp->program.Base.Parameters->NumParameters; +// unsigned offset = brw->curbe.vs_start * 16; +// unsigned nr = vp->max_const; - _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + /* map the vertex constant buffer and copy to curbe: */ - for (i = 0; i < nr; i++) { - buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0]; - buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1]; - buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2]; - buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3]; - } +// assert(nr == 0); + assert(0); } if (0) { @@ -309,7 +299,12 @@ static void upload_constant_buffer(struct brw_context *brw) /* Copy data to the buffer: */ - dri_bo_subdata(pool->buffer, brw->curbe.gs_offset, bufsz, buf); + brw->winsys->buffer_subdata_typed(brw->winsys, + pool->buffer, + brw->curbe.gs_offset, + bufsz, + buf, + BRW_CONSTANT_BUFFER ); } /* TODO: only emit the constant_buffer packet when necessary, ie: @@ -341,9 +336,7 @@ static void upload_constant_buffer(struct brw_context *brw) * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ -/* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */ BRW_BATCH_STRUCT(brw, &cb); -/* intel_batchbuffer_align(brw->intel.batch, 64, 0); */ } } @@ -355,9 +348,8 @@ static void upload_constant_buffer(struct brw_context *brw) */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */ - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_VERTEX_PROGRAM | + .brw = (BRW_NEW_CLIP | + BRW_NEW_CONSTANTS | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ BRW_NEW_CURBE_OFFSETS), @@ -366,4 +358,3 @@ const struct brw_tracked_state brw_constant_buffer = { .update = upload_constant_buffer }; -#endif diff --git a/src/mesa/pipe/i965simple/brw_eu.h b/src/mesa/pipe/i965simple/brw_eu.h index 111edb1506..23151ae9ed 100644 --- a/src/mesa/pipe/i965simple/brw_eu.h +++ b/src/mesa/pipe/i965simple/brw_eu.h @@ -694,6 +694,17 @@ void brw_init_compile( struct brw_compile *p ); const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ); +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ); + +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ); + /* Helpers for regular instructions: */ #define ALU1(OP) \ diff --git a/src/mesa/pipe/i965simple/brw_eu_emit.c b/src/mesa/pipe/i965simple/brw_eu_emit.c index bda63e8b9a..2423536dd1 100644 --- a/src/mesa/pipe/i965simple/brw_eu_emit.c +++ b/src/mesa/pipe/i965simple/brw_eu_emit.c @@ -363,10 +363,10 @@ static struct brw_instruction *next_insn( struct brw_compile *p, } -static struct brw_instruction *brw_alu1( struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src ) +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ) { struct brw_instruction *insn = next_insn(p, opcode); brw_set_dest(insn, dest); @@ -374,11 +374,11 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p, return insn; } -static struct brw_instruction *brw_alu2(struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1 ) +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) { struct brw_instruction *insn = next_insn(p, opcode); brw_set_dest(insn, dest); diff --git a/src/mesa/pipe/i965simple/brw_gs_state.c b/src/mesa/pipe/i965simple/brw_gs_state.c index 8e62eb4bd7..3932e9e939 100644 --- a/src/mesa/pipe/i965simple/brw_gs_state.c +++ b/src/mesa/pipe/i965simple/brw_gs_state.c @@ -34,6 +34,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "pipe/p_util.h" @@ -46,7 +47,7 @@ static void upload_gs_unit( struct brw_context *brw ) /* CACHE_NEW_GS_PROG */ if (brw->gs.prog_active) { gs.thread0.grf_reg_count = - ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1; + align(brw->gs.prog_data->total_grf, 16) / 16 - 1; gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; } diff --git a/src/mesa/pipe/i965simple/brw_misc_state.c b/src/mesa/pipe/i965simple/brw_misc_state.c index 2ba4d3a928..e600e9d8de 100644 --- a/src/mesa/pipe/i965simple/brw_misc_state.c +++ b/src/mesa/pipe/i965simple/brw_misc_state.c @@ -315,7 +315,7 @@ static void upload_pipe_control(struct brw_context *brw) const struct brw_tracked_state brw_pipe_control = { .dirty = { - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_SCENE, .cache = 0 }, .update = upload_pipe_control @@ -380,7 +380,7 @@ static void upload_invarient_state( struct brw_context *brw ) const struct brw_tracked_state brw_invarient_state = { .dirty = { - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_SCENE, .cache = 0 }, .update = upload_invarient_state @@ -416,7 +416,7 @@ static void upload_state_base_address( struct brw_context *brw ) const struct brw_tracked_state brw_state_base_address = { .dirty = { - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_SCENE, .cache = 0 }, .update = upload_state_base_address diff --git a/src/mesa/pipe/i965simple/brw_sf.c b/src/mesa/pipe/i965simple/brw_sf.c index f009ff37d9..e7c02beda5 100644 --- a/src/mesa/pipe/i965simple/brw_sf.c +++ b/src/mesa/pipe/i965simple/brw_sf.c @@ -36,9 +36,8 @@ #include "brw_util.h" #include "brw_sf.h" #include "brw_state.h" +#include "tgsi/util/tgsi_parse.h" -#if 0 -#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) @@ -46,7 +45,6 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_compile c; const unsigned *program; unsigned program_size; - unsigned i, idx; memset(&c, 0, sizeof(c)); @@ -55,27 +53,17 @@ static void compile_sf_prog( struct brw_context *brw, brw_init_compile(&c.func); c.key = *key; - c.nr_attrs = brw_count_bits(c.key.attrs); + + + c.nr_attrs = c.key.vp_output_count; c.nr_attr_regs = (c.nr_attrs+1)/2; - c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); + + c.nr_setup_attrs = c.key.fp_input_count; c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; - /* Construct map from attribute number to position in the vertex. - */ - for (i = idx = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { - c.point_attrs[i].CoordReplace = - brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0]; - } else - c.point_attrs[i].CoordReplace = FALSE; - idx++; - } /* Which primitive? Or all three? */ @@ -90,21 +78,17 @@ static void compile_sf_prog( struct brw_context *brw, break; case SF_POINTS: c.nr_verts = 1; - if (key->do_point_sprite) - brw_emit_point_sprite_setup( &c ); - else - brw_emit_point_setup( &c ); + brw_emit_point_setup( &c ); break; + case SF_UNFILLED_TRIS: - c.nr_verts = 3; - brw_emit_anyprim_setup( &c ); - break; default: assert(0); return; } + /* get the program */ program = brw_get_program(&c.func, &program_size); @@ -142,20 +126,15 @@ static void upload_sf_prog( struct brw_context *brw ) /* Populate the key, noting state dependencies: */ /* CACHE_NEW_VS_PROG */ - key.attrs = brw->vs.prog_data->outputs_written; + key.vp_output_count = brw->vs.prog_data->outputs_written; /* BRW_NEW_REDUCED_PRIMITIVE */ switch (brw->reduced_primitive) { case PIPE_PRIM_TRIANGLES: - /* NOTE: We just use the edgeflag attribute as an indicator that - * unfilled triangles are active. We don't actually do the - * edgeflag testing here, it is already done in the clip - * program. - */ - if (key.attrs & (1<attribs.Point->PointSprite; - key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; - /* BRW_NEW_RASTER */ - key.do_flat_shading = (brw->attribs.Raster->flatshade); - key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); - /* _NEW_POLYGON */ - if (key.do_twoside_color) - key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); +// key.do_point_sprite = brw->attribs.Point->PointSprite; +// key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; + +// key.do_flat_shading = (brw->attribs.Raster->flatshade); +// key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); + +// if (key.do_twoside_color) +// key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); if (!search_cache(brw, &key)) @@ -184,11 +162,150 @@ static void upload_sf_prog( struct brw_context *brw ) const struct brw_tracked_state brw_sf_prog = { .dirty = { - .brw = (BRW_NEW_RASTER | - BRW_NEW_REDUCED_PRIMITIVE), - .cache = CACHE_NEW_VS_PROG + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_REDUCED_PRIMITIVE | + BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, }, .update = upload_sf_prog }; -#endif + +/* Build a struct like the one we'd like the state tracker to pass to + * us. + */ +static void update_sf_linkage( struct brw_context *brw ) +{ + const struct brw_vertex_program *vs = brw->attribs.VertexProgram; + const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; + struct pipe_setup_linkage state; + struct tgsi_parse_context parse; + + int i, j; + int nr_vp_outputs = 0; + int done = 0; + + struct { + unsigned semantic:8; + unsigned semantic_index:16; + } fp_semantic[32], vp_semantic[32]; + + memset(&state, 0, sizeof(state)); + + state.fp_input_count = 0; + + + + + /* First scan fp inputs + */ + tgsi_parse_init( &parse, fs->program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + + for (i = first; i < last; i++) { + state.fp_input[i].vp_output = ~0; + state.fp_input[i].bf_vp_output = ~0; + state.fp_input[i].interp_mode = + parse.FullToken.FullDeclaration.Interpolation.Interpolate; + + fp_semantic[i].semantic = + parse.FullToken.FullDeclaration.Semantic.SemanticName; + fp_semantic[i].semantic_index = + parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + + } + + assert(last > state.fp_input_count); + state.fp_input_count = last; + } + break; + default: + done = 1; + break; + } + } + + + assert(state.fp_input_count == fs->program.num_inputs); + + + /* Then scan vp outputs + */ + done = 0; + tgsi_parse_init( &parse, vs->program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + + for (i = first; i < last; i++) { + vp_semantic[i].semantic = + parse.FullToken.FullDeclaration.Semantic.SemanticName; + vp_semantic[i].semantic_index = + parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + } + + assert(last > nr_vp_outputs); + nr_vp_outputs = last; + } + break; + default: + done = 1; + break; + } + } + + + /* Now match based on semantic information. + */ + for (i = 0; i< state.fp_input_count; i++) { + for (j = 0; j < nr_vp_outputs; j++) { + if (fp_semantic[i].semantic == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].vp_output = j; + } + } + if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) { + for (j = 0; j < nr_vp_outputs; j++) { + if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].bf_vp_output = j; + } + } + } + } + + if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) { + brw->sf.linkage = state; + brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE; + } +} + + +const struct brw_tracked_state brw_sf_linkage = { + .dirty = { + .brw = (BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, + }, + .update = update_sf_linkage +}; + diff --git a/src/mesa/pipe/i965simple/brw_sf.h b/src/mesa/pipe/i965simple/brw_sf.h index d04388325d..b7ada47560 100644 --- a/src/mesa/pipe/i965simple/brw_sf.h +++ b/src/mesa/pipe/i965simple/brw_sf.h @@ -42,15 +42,26 @@ #define SF_TRIANGLES 2 #define SF_UNFILLED_TRIS 3 + + struct brw_sf_prog_key { - unsigned attrs:32; + unsigned vp_output_count:5; + unsigned fp_input_count:5; + unsigned primitive:2; unsigned do_twoside_color:1; unsigned do_flat_shading:1; unsigned frontface_ccw:1; unsigned do_point_sprite:1; - unsigned pad:10; - int SpriteOrigin; + + /* Interpolation masks; + */ + unsigned linear_mask; + unsigned persp_mask; + unsigned const_mask; + + +// int SpriteOrigin; }; struct brw_sf_point_tex { diff --git a/src/mesa/pipe/i965simple/brw_sf_emit.c b/src/mesa/pipe/i965simple/brw_sf_emit.c index 93f23171f2..834b5efdfe 100644 --- a/src/mesa/pipe/i965simple/brw_sf_emit.c +++ b/src/mesa/pipe/i965simple/brw_sf_emit.c @@ -36,171 +36,6 @@ #include "brw_util.h" #include "brw_sf.h" -#if 0 -static struct brw_reg get_vert_attr(struct brw_sf_compile *c, - struct brw_reg vert, - unsigned attr) -{ - unsigned off = c->attr_to_idx[attr] / 2; - unsigned sub = c->attr_to_idx[attr] % 2; - - return brw_vec4_grf(vert.nr + off, sub * 4); -} - -static boolean have_attr(struct brw_sf_compile *c, - unsigned attr) -{ - return (c->key.attrs & (1<func; - unsigned i; - - for (i = 0; i < 2; i++) { - if (have_attr(c, VERT_RESULT_COL0+i) && - have_attr(c, VERT_RESULT_BFC0+i)) - brw_MOV(p, - get_vert_attr(c, vert, VERT_RESULT_COL0+i), - get_vert_attr(c, vert, VERT_RESULT_BFC0+i)); - } -} - - -static void do_twoside_color( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *if_insn; - unsigned backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L; - - /* Already done in clip program: - */ - if (c->key.primitive == SF_UNFILLED_TRIS) - return; - - /* XXX: What happens if BFC isn't present? This could only happen - * for user-supplied vertex programs, as t_vp_build.c always does - * the right thing. - */ - if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) && - !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1))) - return; - - /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order - * to get all channels active inside the IF. In the clipping code - * we run with NoMask, so it's not an option and we can use - * BRW_EXECUTE_1 for all comparisions. - */ - brw_push_insn_state(p); - brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0)); - if_insn = brw_IF(p, BRW_EXECUTE_4); - { - switch (c->nr_verts) { - case 3: copy_bfc(c, c->vert[2]); - case 2: copy_bfc(c, c->vert[1]); - case 1: copy_bfc(c, c->vert[0]); - } - } - brw_ENDIF(p, if_insn); - brw_pop_insn_state(p); -} - - - -/*********************************************************************** - * Flat shading - */ - -#define VERT_RESULT_COLOR_BITS ((1<func; - unsigned i; - - for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) { - if (have_attr(c,i)) - brw_MOV(p, - get_vert_attr(c, dst, i), - get_vert_attr(c, src, i)); - } -} - - - -/* Need to use a computed jump to copy flatshaded attributes as the - * vertices are ordered according to y-coordinate before reaching this - * point, so the PV could be anywhere. - */ -static void do_flatshade_triangle( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg ip = brw_ip_reg(); - unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); - if (!nr) - return; - - /* Already done in clip program: - */ - if (c->key.primitive == SF_UNFILLED_TRIS) - return; - - brw_push_insn_state(p); - - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1)); - brw_JMPI(p, ip, ip, c->pv); - - copy_colors(c, c->vert[1], c->vert[0]); - copy_colors(c, c->vert[2], c->vert[0]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1)); - - copy_colors(c, c->vert[0], c->vert[1]); - copy_colors(c, c->vert[2], c->vert[1]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*2)); - - copy_colors(c, c->vert[0], c->vert[2]); - copy_colors(c, c->vert[1], c->vert[2]); - - brw_pop_insn_state(p); -} - - -static void do_flatshade_line( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg ip = brw_ip_reg(); - unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); - - if (!nr) - return; - - /* Already done in clip program: - */ - if (c->key.primitive == SF_UNFILLED_TRIS) - return; - - brw_push_insn_state(p); - - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1)); - brw_JMPI(p, ip, ip, c->pv); - copy_colors(c, c->vert[1], c->vert[0]); - - brw_JMPI(p, ip, ip, brw_imm_ud(nr)); - copy_colors(c, c->vert[0], c->vert[1]); - - brw_pop_insn_state(p); -} - /*********************************************************************** @@ -277,9 +112,6 @@ static void copy_z_inv_w( struct brw_sf_compile *c ) static void invert_det( struct brw_sf_compile *c) { - /* Looks like we invert all 8 elements just to get 1/det in - * position 2 !?! - */ brw_math(&c->func, c->inv_det, BRW_MATH_FUNCTION_INV, @@ -302,22 +134,16 @@ static boolean calculate_masks( struct brw_sf_compile *c, ushort *pc_linear) { boolean is_last_attr = (reg == c->nr_setup_regs - 1); - unsigned persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS; - unsigned linear_mask; - if (c->key.do_flat_shading) - linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1); - else - linear_mask = c->key.attrs; *pc_persp = 0; *pc_linear = 0; *pc = 0xf; - if (persp_mask & (1 << c->idx_to_attr[reg*2])) - *pc_persp = 0xf; +// if (persp_mask & (1 << c->idx_to_attr[reg*2])) +// *pc_persp = 0xf; - if (linear_mask & (1 << c->idx_to_attr[reg*2])) +// if (linear_mask & (1 << c->idx_to_attr[reg*2])) *pc_linear = 0xf; /* Maybe only processs one attribute on the final round: @@ -325,10 +151,10 @@ static boolean calculate_masks( struct brw_sf_compile *c, if (reg*2+1 < c->nr_setup_attrs) { *pc |= 0xf0; - if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) - *pc_persp |= 0xf0; +// if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) +// *pc_persp |= 0xf0; - if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) +// if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) *pc_linear |= 0xf0; } @@ -347,12 +173,6 @@ void brw_emit_tri_setup( struct brw_sf_compile *c ) invert_det(c); copy_z_inv_w(c); - if (c->key.do_twoside_color) - do_twoside_color(c); - - if (c->key.do_flat_shading) - do_flatshade_triangle(c); - for (i = 0; i < c->nr_setup_regs; i++) { @@ -433,9 +253,6 @@ void brw_emit_line_setup( struct brw_sf_compile *c ) invert_det(c); copy_z_inv_w(c); - if (c->key.do_flat_shading) - do_flatshade_line(c); - for (i = 0; i < c->nr_setup_regs; i++) { /* Pair of incoming attributes: @@ -491,86 +308,6 @@ void brw_emit_line_setup( struct brw_sf_compile *c ) } } -void brw_emit_point_sprite_setup( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - unsigned i; - - c->nr_verts = 1; - alloc_regs(c); - copy_z_inv_w(c); - for (i = 0; i < c->nr_setup_regs; i++) - { - struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; - struct brw_reg a0 = offset(c->vert[0], i); - ushort pc, pc_persp, pc_linear; - boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - if (!tex->CoordReplace) { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } - } - - if (tex->CoordReplace) { - /* Caculate 1.0/PointWidth */ - brw_math(&c->func, - c->tmp, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 0, - c->dx0, - BRW_MATH_DATA_SCALAR, - BRW_MATH_PRECISION_FULL); - - if (c->key.SpriteOrigin == GL_UPPER_LEFT) { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } else { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } - } else { - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); - } - - { - brw_set_predicate_control_flag_value(p, pc); - if (tex->CoordReplace) { - if (c->key.SpriteOrigin == GL_UPPER_LEFT) { - brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); - brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); - } - else - brw_MOV(p, c->m3C0, brw_imm_f(0.0)); - } else { - brw_MOV(p, c->m3C0, a0); /* constant value */ - } - - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); - } - } -} /* Points setup - several simplifications as all attributes are * constant across the face of the point (point sprites excluded!) @@ -629,68 +366,3 @@ void brw_emit_point_setup( struct brw_sf_compile *c ) } } } - -void brw_emit_anyprim_setup( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg ip = brw_ip_reg(); - struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); - struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); - struct brw_reg primmask; - struct brw_instruction *jmp; - struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); - - alloc_regs(c); - - primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); - - brw_MOV(p, primmask, brw_imm_ud(1)); - brw_SHL(p, primmask, primmask, payload_prim); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); - brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | - (1<<_3DPRIM_TRISTRIP) | - (1<<_3DPRIM_TRIFAN) | - (1<<_3DPRIM_TRISTRIP_REVERSE) | - (1<<_3DPRIM_POLYGON) | - (1<<_3DPRIM_RECTLIST) | - (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); - { - brw_push_insn_state(p); - brw_emit_tri_setup( c ); - brw_pop_insn_state(p); - /* note - thread killed in subroutine */ - } - brw_land_fwd_jump(p, jmp); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); - brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | - (1<<_3DPRIM_LINESTRIP) | - (1<<_3DPRIM_LINELOOP) | - (1<<_3DPRIM_LINESTRIP_CONT) | - (1<<_3DPRIM_LINESTRIP_BF) | - (1<<_3DPRIM_LINESTRIP_CONT_BF))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); - { - brw_push_insn_state(p); - brw_emit_line_setup( c ); - brw_pop_insn_state(p); - /* note - thread killed in subroutine */ - } - brw_land_fwd_jump(p, jmp); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); - brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<intel.ctx; struct brw_sf_viewport sfv; - struct intel_renderbuffer *irb = - intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0][0]); - float y_scale, y_bias; - int x, y, w, h, x1, x2, y1, y2; - int draw_h = ctx->DrawBuffer->Height; memset(&sfv, 0, sizeof(sfv)); - if (ctx->DrawBuffer->Name) { - /* User-created FBO */ - if (irb && !irb->RenderToTexture) { - y_scale = -1.0; - y_bias = draw_h; - } else { - y_scale = 1.0; - y_bias = 0; - } - } else { - if (brw->intel.driDrawable != NULL) { - y_scale = -1.0; - y_bias = draw_h; - } else { - y_scale = 1.0; - y_bias = 0; - } - } - /* _NEW_VIEWPORT, BRW_NEW_METAOPS */ - - if (!brw->metaops.active) { - const float *v = brw->intel.ctx.Viewport._WindowMap.m; - - sfv.viewport.m00 = v[MAT_SX]; - sfv.viewport.m11 = v[MAT_SY] * y_scale; - sfv.viewport.m22 = v[MAT_SZ] * brw->intel.depth_scale; - sfv.viewport.m30 = v[MAT_TX]; - sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; - sfv.viewport.m32 = v[MAT_TZ] * brw->intel.depth_scale; - } else { - sfv.viewport.m00 = 1; - sfv.viewport.m11 = - 1; - sfv.viewport.m22 = 1; - sfv.viewport.m30 = 0; - sfv.viewport.m31 = brw->intel.driDrawable->h; - sfv.viewport.m32 = 0; - } + /* BRW_NEW_VIEWPORT */ + { + const float *scale = brw->attribs.Viewport.scale; + const float *trans = brw->attribs.Viewport.translate; - /* _NEW_SCISSOR */ - x = brw->attribs.Scissor->X; - y = brw->attribs.Scissor->Y; - w = brw->attribs.Scissor->Width; - h = brw->attribs.Scissor->Height; - - if (ctx->DrawBuffer->Name == 0) { - x1 = x; - y1 = draw_h - (y + h); - x2 = x + w - 1; - y2 = y1 + h - 1; - } else { - /* FBO has non-inverted coords. */ - x1 = x; - y1 = y; - x2 = x + w - 1; - y2 = y + h - 1; + sfv.viewport.m00 = scale[0]; + sfv.viewport.m11 = scale[1]; + sfv.viewport.m22 = scale[2]; + sfv.viewport.m30 = trans[0]; + sfv.viewport.m31 = trans[1]; + sfv.viewport.m32 = trans[2]; } - sfv.scissor.xmin = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1); - sfv.scissor.xmax = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1); - sfv.scissor.ymin = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1); - sfv.scissor.ymax = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1); + /* _NEW_SCISSOR */ + sfv.scissor.xmin = brw->attribs.Scissor.minx; + sfv.scissor.xmax = brw->attribs.Scissor.maxx; + sfv.scissor.ymin = brw->attribs.Scissor.miny; + sfv.scissor.ymax = brw->attribs.Scissor.maxy; brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); } const struct brw_tracked_state brw_sf_vp = { .dirty = { - .mesa = (_NEW_VIEWPORT | - _NEW_SCISSOR), - .brw = BRW_NEW_METAOPS, + .brw = (BRW_NEW_SCISSOR | + BRW_NEW_VIEWPORT), .cache = 0 }, .update = upload_sf_vp @@ -130,7 +80,7 @@ static void upload_sf_unit( struct brw_context *brw ) memset(&sf, 0, sizeof(sf)); /* CACHE_NEW_SF_PROG */ - sf.thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1; + sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1; sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; @@ -151,19 +101,19 @@ static void upload_sf_unit( struct brw_context *brw ) /* CACHE_NEW_SF_VP */ sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; - sf.sf5.viewport_transform = 1; - /* _NEW_SCISSOR */ - if (brw->attribs.Scissor->Enabled) + /* BRW_NEW_RASTER */ + if (brw->attribs.Raster->scissor) sf.sf6.scissor = 1; - /* _NEW_POLYGON */ +#if 0 if (brw->attribs.Polygon->FrontFace == GL_CCW) sf.sf5.front_winding = BRW_FRONTWINDING_CCW; else sf.sf5.front_winding = BRW_FRONTWINDING_CW; + if (brw->attribs.Polygon->CullFlag) { switch (brw->attribs.Polygon->CullFaceMode) { case GL_FRONT: @@ -182,25 +132,24 @@ static void upload_sf_unit( struct brw_context *brw ) } else sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#else + sf.sf5.front_winding = BRW_FRONTWINDING_CW; + sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#endif - - /* _NEW_LINE */ - /* XXX use ctx->Const.Min/MaxLineWidth here */ - sf.sf6.line_width = CLAMP(brw->attribs.Line->Width, 1.0, 5.0) * (1<<1); + sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1); sf.sf6.line_endcap_aa_region_width = 1; - if (brw->attribs.Line->SmoothFlag) + if (brw->attribs.Raster->line_smooth) sf.sf6.aa_enable = 1; else if (sf.sf6.line_width <= 0x2) sf.sf6.line_width = 0; - /* _NEW_POINT */ sf.sf6.point_rast_rule = 1; /* opengl conventions */ - /* XXX clamp max depends on AA vs. non-AA */ - sf.sf7.sprite_point = brw->attribs.Point->PointSprite; - sf.sf7.point_size = CLAMP(brw->attribs.Point->Size, 1.0, 255.0) * (1<<3); - sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated; + sf.sf7.sprite_point = brw->attribs.Raster->point_sprite; + sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3); + sf.sf7.use_point_size_state = brw->attribs.Raster->point_size_per_vertex; /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ @@ -220,12 +169,8 @@ static void upload_sf_unit( struct brw_context *brw ) const struct brw_tracked_state brw_sf_unit = { .dirty = { - .mesa = (_NEW_POLYGON | - _NEW_LINE | - _NEW_POINT | - _NEW_SCISSOR), - .brw = (BRW_NEW_URB_FENCE | - BRW_NEW_METAOPS), + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_URB_FENCE), .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) }, @@ -233,4 +178,3 @@ const struct brw_tracked_state brw_sf_unit = { }; -#endif diff --git a/src/mesa/pipe/i965simple/brw_shader_info.c b/src/mesa/pipe/i965simple/brw_shader_info.c new file mode 100644 index 0000000000..431b45466a --- /dev/null +++ b/src/mesa/pipe/i965simple/brw_shader_info.c @@ -0,0 +1,49 @@ + +#include "brw_context.h" +#include "brw_state.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + + + + +void brw_shader_info(const struct tgsi_token *tokens, + struct brw_shader_info *info ) +{ + struct tgsi_parse_context parse; + int done = 0; + + tgsi_parse_init( &parse, tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned last = decl->u.DeclarationRange.Last; + + assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + + // Broken by crazy wpos init: + //assert( info->nr_regs[decl->Declaration.File] <= last); + + info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File], + last+1); + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + +} diff --git a/src/mesa/pipe/i965simple/brw_state.c b/src/mesa/pipe/i965simple/brw_state.c index f1eba146c4..032a4e0285 100644 --- a/src/mesa/pipe/i965simple/brw_state.c +++ b/src/mesa/pipe/i965simple/brw_state.c @@ -198,6 +198,10 @@ static void * brw_create_fs_state(struct pipe_context *pipe, /* XXX: Do I have to duplicate the tokens as well?? */ brw_fp->program = *shader; + brw_fp->id = brw_context(pipe)->program_id++; + + brw_shader_info(shader->tokens, + &brw_fp->info); return (void *)brw_fp; } @@ -228,6 +232,9 @@ static void *brw_create_vs_state(struct pipe_context *pipe, /* XXX: Do I have to duplicate the tokens as well?? */ brw_vp->program = *shader; + brw_vp->id = brw_context(pipe)->program_id++; + brw_shader_info(shader->tokens, + &brw_vp->info); tgsi_dump(shader->tokens, 0); diff --git a/src/mesa/pipe/i965simple/brw_state.h b/src/mesa/pipe/i965simple/brw_state.h index 4dabfe8082..d09711f6f0 100644 --- a/src/mesa/pipe/i965simple/brw_state.h +++ b/src/mesa/pipe/i965simple/brw_state.h @@ -154,4 +154,11 @@ void brw_upload_clip_prog(struct brw_context *brw); void brw_upload_blend_constant_color(struct brw_context *brw); void brw_upload_wm_samplers(struct brw_context *brw); +/* brw_shader_info.c + */ + +void brw_shader_info(const struct tgsi_token *tokens, + struct brw_shader_info *info ); + + #endif diff --git a/src/mesa/pipe/i965simple/brw_state_cache.c b/src/mesa/pipe/i965simple/brw_state_cache.c index 13e262d2e5..c5738733f4 100644 --- a/src/mesa/pipe/i965simple/brw_state_cache.c +++ b/src/mesa/pipe/i965simple/brw_state_cache.c @@ -178,8 +178,9 @@ unsigned brw_upload_cache( struct brw_cache *cache, if (BRW_DEBUG & DEBUG_STATE) printf("upload %s: %d bytes to pool buffer %p offset %x\n", - cache->name, data_size, - cache->pool->buffer, + cache->name, + data_size, + (void*)cache->pool->buffer, offset); /* Copy data to the buffer: diff --git a/src/mesa/pipe/i965simple/brw_state_pool.c b/src/mesa/pipe/i965simple/brw_state_pool.c index a490049024..78268ed8f2 100644 --- a/src/mesa/pipe/i965simple/brw_state_pool.c +++ b/src/mesa/pipe/i965simple/brw_state_pool.c @@ -43,17 +43,18 @@ */ #include "pipe/p_winsys.h" +#include "pipe/p_util.h" #include "brw_context.h" #include "brw_state.h" boolean brw_pool_alloc( struct brw_mem_pool *pool, unsigned size, - unsigned align, + unsigned alignment, unsigned *offset_return) { - unsigned fixup = ALIGN(pool->offset, align) - pool->offset; + unsigned fixup = align(pool->offset, alignment) - pool->offset; - size = ALIGN(size, 4); + size = align(size, 4); if (pool->offset + fixup + size >= pool->size) { printf("%s failed\n", __FUNCTION__); @@ -114,7 +115,7 @@ void brw_pool_check_wrap( struct brw_context *brw, struct brw_mem_pool *pool ) { if (pool->offset > (pool->size * 3) / 4) { - brw->state.dirty.brw |= BRW_NEW_CONTEXT; + brw->state.dirty.brw |= BRW_NEW_SCENE; } } diff --git a/src/mesa/pipe/i965simple/brw_state_upload.c b/src/mesa/pipe/i965simple/brw_state_upload.c index 10f1d4812a..1fb480172d 100644 --- a/src/mesa/pipe/i965simple/brw_state_upload.c +++ b/src/mesa/pipe/i965simple/brw_state_upload.c @@ -97,8 +97,6 @@ const struct brw_tracked_state *atoms[] = void brw_init_state( struct brw_context *brw ) { - unsigned i; - brw_init_pools(brw); brw_init_caches(brw); @@ -156,7 +154,7 @@ void brw_validate_state( struct brw_context *brw ) state->brw == 0) return; - if (brw->state.dirty.brw & BRW_NEW_CONTEXT) + if (brw->state.dirty.brw & BRW_NEW_SCENE) brw_clear_batch_cache_flush(brw); if (BRW_DEBUG) { diff --git a/src/mesa/pipe/i965simple/brw_tex_layout.c b/src/mesa/pipe/i965simple/brw_tex_layout.c index b9514be0c2..7d6e2851b1 100644 --- a/src/mesa/pipe/i965simple/brw_tex_layout.c +++ b/src/mesa/pipe/i965simple/brw_tex_layout.c @@ -149,10 +149,10 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) unsigned mip1_width; if (pt->compressed) { - mip1_width = ALIGN(minify(pt->width[0]), align_w) - + ALIGN(minify(minify(pt->width[0])), align_w); + mip1_width = align(minify(pt->width[0]), align_w) + + align(minify(minify(pt->width[0])), align_w); } else { - mip1_width = ALIGN(minify(pt->width[0]), align_w) + mip1_width = align(minify(pt->width[0]), align_w) + minify(minify(pt->width[0])); } @@ -164,7 +164,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) /* Pitch must be a whole number of dwords, even though we * express it in texels. */ - tex->pitch = ALIGN(tex->pitch * pt->cpp, 4) / pt->cpp; + tex->pitch = align(tex->pitch * pt->cpp, 4) / pt->cpp; tex->total_height = 0; for ( level = pt->first_level ; level <= pt->last_level ; level++ ) { @@ -176,7 +176,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) if (pt->compressed) img_height = MAX2(1, height/4); else - img_height = ALIGN(height, align_h); + img_height = align(height, align_h); /* Because the images are packed better, the final offset @@ -187,7 +187,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) /* Layout_below: step right after second mipmap. */ if (level == pt->first_level + 1) { - x += ALIGN(width, align_w); + x += align(width, align_w); } else { y += img_height; @@ -221,13 +221,13 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture #if 0 if (pt->compressed) { align_w = intel_compressed_alignment(pt->internal_format); - pt->pitch = ALIGN(width, align_w); + pt->pitch = align(width, align_w); pack_y_pitch = (height + 3) / 4; } else #endif { - tex->pitch = ALIGN(pt->width[0] * pt->cpp, 4) / pt->cpp; - pack_y_pitch = ALIGN(pt->height[0], align_h); + tex->pitch = align(pt->width[0] * pt->cpp, 4) / pt->cpp; + pack_y_pitch = align(pt->height[0], align_h); } pack_x_pitch = tex->pitch; @@ -262,8 +262,8 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture if (pt->compressed) { pack_y_pitch = (height + 3) / 4; - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); + if (pack_x_pitch > align(width, align_w)) { + pack_x_pitch = align(width, align_w); pack_x_nr <<= 1; } } else { @@ -275,7 +275,7 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture if (pack_y_pitch > 2) { pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); + pack_y_pitch = align(pack_y_pitch, align_h); } } @@ -305,8 +305,6 @@ brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt) sizeof(struct brw_texture)); if (tex) { - struct brw_context *brw = brw_context(pipe); - memset(&tex->base + 1, 0, sizeof(struct brw_texture) - sizeof(struct pipe_texture)); diff --git a/src/mesa/pipe/i965simple/brw_vs_state.c b/src/mesa/pipe/i965simple/brw_vs_state.c index 7d6fb383b9..c73469929c 100644 --- a/src/mesa/pipe/i965simple/brw_vs_state.c +++ b/src/mesa/pipe/i965simple/brw_vs_state.c @@ -44,7 +44,7 @@ static void upload_vs_unit( struct brw_context *brw ) /* CACHE_NEW_VS_PROG */ vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; - vs.thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; + vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; vs.thread3.dispatch_grf_start_reg = 1; diff --git a/src/mesa/pipe/i965simple/brw_wm.c b/src/mesa/pipe/i965simple/brw_wm.c index f4e3166e1c..f0a38d384b 100644 --- a/src/mesa/pipe/i965simple/brw_wm.c +++ b/src/mesa/pipe/i965simple/brw_wm.c @@ -33,7 +33,9 @@ #include "brw_context.h" #include "brw_util.h" #include "brw_wm.h" +#include "brw_eu.h" #include "brw_state.h" +#include "pipe/p_util.h" @@ -41,24 +43,22 @@ static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) { - struct brw_wm_compile *c; + struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile); const unsigned *program; unsigned program_size; - c = brw->wm.compile_data; - if (c == NULL) { - brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); - c = brw->wm.compile_data; - } else { - memset(c, 0, sizeof(*brw->wm.compile_data)); - } - memcpy(&c->key, key, sizeof(*key)); - + c->key = *key; c->fp = fp; + + c->delta_xy[0] = brw_null_reg(); + c->delta_xy[1] = brw_null_reg(); + c->pixel_xy[0] = brw_null_reg(); + c->pixel_xy[1] = brw_null_reg(); + c->pixel_w = brw_null_reg(); + fprintf(stderr, "XXXXXXXX FP\n"); - brw_wm_glsl_emit(c); /* get the program @@ -74,6 +74,8 @@ static void do_wm_prog( struct brw_context *brw, program_size, &c->prog_data, &brw->wm.prog_data ); + + FREE(c); } @@ -86,8 +88,7 @@ static void brw_wm_populate_key( struct brw_context *brw, (struct brw_fragment_program *)brw->attribs.FragmentProgram; unsigned lookup = 0; unsigned line_aa; - unsigned i; - + memset(key, 0, sizeof(*key)); /* Build the index for table lookup @@ -204,7 +205,6 @@ static void brw_upload_wm_prog( struct brw_context *brw ) const struct brw_tracked_state brw_wm_prog = { .dirty = { .brw = (BRW_NEW_FS | - BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), .cache = 0 }, diff --git a/src/mesa/pipe/i965simple/brw_wm.h b/src/mesa/pipe/i965simple/brw_wm.h index a394e25da3..a1ac0f504a 100644 --- a/src/mesa/pipe/i965simple/brw_wm.h +++ b/src/mesa/pipe/i965simple/brw_wm.h @@ -60,86 +60,19 @@ struct brw_wm_prog_key { unsigned aa_dest_stencil_reg:3; unsigned dest_depth_reg:3; unsigned nr_depth_regs:3; - unsigned projtex_mask:8; unsigned shadowtex_mask:8; unsigned computes_depth:1; /* could be derived from program string */ unsigned source_depth_to_render_target:1; - unsigned flat_shade:1; unsigned runtime_check_aads_emit:1; - - unsigned yuvtex_mask:8; - unsigned pad1:24; - - unsigned program_string_id:32; -}; - - -/* A bit of a glossary: - * - * brw_wm_value: A computed value or program input. Values are - * constant, they are created once and are never modified. When a - * fragment program register is written or overwritten, new values are - * created fresh, preserving the rule that values are constant. - * - * brw_wm_ref: A reference to a value. Wherever a value used is by an - * instruction or as a program output, that is tracked with an - * instance of this struct. All references to a value occur after it - * is created. After the last reference, a value is dead and can be - * discarded. - * - * brw_wm_grf: Represents a physical hardware register. May be either - * empty or hold a value. Register allocation is the process of - * assigning values to grf registers. This occurs in pass2 and the - * brw_wm_grf struct is not used before that. - * - * Fragment program registers: These are time-varying constructs that - * are hard to reason about and which we translate away in pass0. A - * single fragment program register element (eg. temp[0].x) will be - * translated to one or more brw_wm_value structs, one for each time - * that temp[0].x is written to during the program. - */ - - -/* Used in pass2 to track register allocation. - */ -struct brw_wm_grf { - struct brw_wm_value *value; - unsigned nextuse; -}; + unsigned yuvtex_mask:8; -struct brw_wm_value { - struct brw_reg hw_reg; /* emitted to this reg, may not always be there */ - struct brw_wm_ref *lastuse; - struct brw_wm_grf *resident; - unsigned contributes_to_output:1; - unsigned spill_slot:16; /* if non-zero, spill immediately after calculation */ + unsigned program_string_id; }; -struct brw_wm_ref { - struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */ - struct brw_wm_value *value; - struct brw_wm_ref *prevuse; - unsigned unspill_reg:7; /* unspill to reg */ - unsigned emitted:1; - unsigned insn:24; -}; -struct brw_wm_constref { - const struct brw_wm_ref *ref; - float constval; -}; -struct brw_wm_instruction { - struct brw_wm_value *dst[4]; - struct brw_wm_ref *src[3][4]; - unsigned opcode:8; - unsigned saturate:1; - unsigned writemask:4; - unsigned tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ - unsigned tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ -}; #define PROGRAM_INTERNAL_PARAM #define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ @@ -151,124 +84,59 @@ struct brw_wm_instruction { #define BRW_WM_MAX_CONST 256 #define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS - - -/* New opcodes to track internal operations required for WM unit. - * These are added early so that the registers used can be tracked, - * freed and reused like those of other instructions. - */ -#define WM_PIXELXY (TGSI_OPCODE_LAST) -#define WM_DELTAXY (TGSI_OPCODE_LAST + 1) -#define WM_PIXELW (TGSI_OPCODE_LAST + 2) -#define WM_LINTERP (TGSI_OPCODE_LAST + 3) -#define WM_PINTERP (TGSI_OPCODE_LAST + 4) -#define WM_CINTERP (TGSI_OPCODE_LAST + 5) -#define WM_WPOSXY (TGSI_OPCODE_LAST + 6) -#define WM_FB_WRITE (TGSI_OPCODE_LAST + 7) -#define MAX_WM_OPCODE (TGSI_OPCODE_LAST + 8) - #define PAYLOAD_DEPTH (PIPE_ATTRIB_MAX) +#define MAX_IFSN 32 +#define MAX_LOOP_DEPTH 32 + struct brw_wm_compile { struct brw_compile func; struct brw_wm_prog_key key; - struct brw_wm_prog_data prog_data; + struct brw_wm_prog_data prog_data; /* result */ struct brw_fragment_program *fp; - float (*env_param)[4]; - - enum { - START, - PASS2_DONE - } state; - - /* Initial pass - translate fp instructions to fp instructions, - * simplifying and adding instructions for interpolation and - * framebuffer writes. - */ - const struct pipe_shader_state *prog_instructions; - unsigned nr_fp_insns; - unsigned fp_temp; - unsigned fp_interp_emitted; - unsigned fp_deriv_emitted; - - struct tgsi_src_register pixel_xy; - struct tgsi_src_register delta_xy; - struct tgsi_src_register pixel_w; - - - struct brw_wm_value vreg[BRW_WM_MAX_VREG]; - unsigned nr_vreg; - - struct brw_wm_value creg[BRW_WM_MAX_PARAM]; - unsigned nr_creg; + unsigned grf_limit; + unsigned max_wm_grf; - struct { - struct brw_wm_value depth[4]; /* includes r0/r1 */ - struct brw_wm_value input_interp[PIPE_ATTRIB_MAX]; - } payload; + struct brw_reg pixel_xy[2]; + struct brw_reg delta_xy[2]; + struct brw_reg pixel_w; - const struct brw_wm_ref *pass0_fp_reg[16][256][4]; - struct brw_wm_ref undef_ref; - struct brw_wm_value undef_value; + struct brw_reg wm_regs[8][32][4]; - struct brw_wm_ref refs[BRW_WM_MAX_REF]; - unsigned nr_refs; + struct brw_reg payload_depth[4]; + struct brw_reg payload_coef[16]; - struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; - unsigned nr_insns; + struct brw_reg emit_mask_reg; - struct brw_wm_constref constref[BRW_WM_MAX_CONST]; - unsigned nr_constrefs; + struct brw_instruction *if_inst[MAX_IFSN]; + int if_insn; - struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2]; + struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; + int loop_insn; - unsigned grf_limit; - unsigned max_wm_grf; - unsigned last_scratch; + struct brw_instruction *inst0; + struct brw_instruction *inst1; - struct { - boolean inited; - struct brw_reg reg; - } wm_regs[16][256][4]; struct brw_reg stack; - struct brw_reg emit_mask_reg; + struct brw_indirect stack_index; + unsigned reg_index; + + unsigned tmp_start; unsigned tmp_index; }; -unsigned brw_wm_nr_args( unsigned opcode ); -unsigned brw_wm_is_scalar_result( unsigned opcode ); - -void brw_wm_pass_fp( struct brw_wm_compile *c ); -void brw_wm_pass0( struct brw_wm_compile *c ); -void brw_wm_pass1( struct brw_wm_compile *c ); -void brw_wm_pass2( struct brw_wm_compile *c ); -void brw_wm_emit( struct brw_wm_compile *c ); - -void brw_wm_print_value( struct brw_wm_compile *c, - struct brw_wm_value *value ); - -void brw_wm_print_ref( struct brw_wm_compile *c, - struct brw_wm_ref *ref ); - -void brw_wm_print_insn( struct brw_wm_compile *c, - struct brw_wm_instruction *inst ); - -void brw_wm_print_program( struct brw_wm_compile *c, - const char *stage ); void brw_wm_lookup_iz( unsigned line_aa, unsigned lookup, struct brw_wm_prog_key *key ); -#if 0 -boolean brw_wm_is_glsl(struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_wm_compile *c); -#endif +void brw_wm_emit_decls(struct brw_wm_compile *c); #endif diff --git a/src/mesa/pipe/i965simple/brw_wm_decl.c b/src/mesa/pipe/i965simple/brw_wm_decl.c new file mode 100644 index 0000000000..392f17fad6 --- /dev/null +++ b/src/mesa/pipe/i965simple/brw_wm_decl.c @@ -0,0 +1,377 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + c->tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 0; +} + + + +static int is_null( struct brw_reg reg ) +{ + return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + reg.nr == BRW_ARF_NULL); +} + +static void emit_pixel_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_xy[0])) { + + struct brw_compile *p = &c->func; + struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + + c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + brw_ADD(p, + c->pixel_xy[0], + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + + brw_ADD(p, + c->pixel_xy[1], + stride(suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + } +} + + + + + + +static void emit_delta_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->delta_xy[0])) { + struct brw_compile *p = &c->func; + struct brw_reg r1 = brw_vec1_grf(1, 0); + + emit_pixel_xy(c); + + c->delta_xy[0] = alloc_tmp(c); + c->delta_xy[1] = alloc_tmp(c); + + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + brw_ADD(p, + c->delta_xy[0], + retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + + brw_ADD(p, + c->delta_xy[1], + retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + } +} + + + +#if 0 +static void emit_pixel_w( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_w)) { + struct brw_compile *p = &c->func; + + struct brw_reg interp_wpos = c->coef_wpos; + + c->pixel_w = alloc_tmp(c); + + emit_delta_xy(c); + + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4); + brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]); + + /* Calc w */ + brw_math_16( p, + c->pixel_w, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} +#endif + + +static void emit_cinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_MOV(p, dst, suboffset(interp[i],3)); + } + } +} + +static void emit_linterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + emit_delta_xy(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + } + } +} + +#if 0 +static void emit_pinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + get_delta_xy(c); + get_pixel_w(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + brw_MUL(p, dst, dst, c->pixel_w); + } + } +} +#endif + + + +#if 0 +static void emit_wpos( ) +{ + struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); + struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + struct tgsi_full_src_register deltas = get_delta_xy(c); + struct tgsi_full_src_register arg2; + unsigned opcode; + + opcode = WM_LINTERP; + arg2 = src_undef(); + + /* Have to treat wpos.xy specially: + */ + emit_op(c, + WM_WPOSXY, + dst_mask(dst, WRITEMASK_XY), + 0, 0, 0, + get_pixel_xy(c), + src_undef(), + src_undef()); + + dst = dst_mask(dst, WRITEMASK_ZW); + + /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + */ + emit_op(c, + WM_LINTERP, + dst, + 0, 0, 0, + interp, + deltas, + arg2); +} +#endif + + + + +/* Perform register allocation: + * + * -- r0??? + * -- passthrough depth regs (and stencil/aa??) + * -- curbe ?? + * -- inputs (coefficients) + * + * Use a totally static register allocation. This will perform poorly + * but is an easy way to get started (again). + */ +static void prealloc_reg(struct brw_wm_compile *c) +{ + int i, j; + int nr_curbe_regs = 0; + + /* R0, then some depth related regs: + */ + for (i = 0; i < c->key.nr_depth_regs; i++) { + c->payload_depth[i] = brw_vec8_grf(i*2, 0); + c->reg_index += 2; + } + + + /* Then a copy of our part of the CURBE entry: + */ + { + int nr_constants = c->fp->info.nr_regs[TGSI_FILE_CONSTANT]; + int index = 0; + + c->prog_data.max_const = 4*nr_constants; + for (i = 0; i < nr_constants; i++) { + for (j = 0; j < 4; j++, index++) + c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8, + index%8); + } + + nr_curbe_regs = 2*((4*nr_constants+15)/16); + c->reg_index += nr_curbe_regs; + } + + /* Next we receive the plane coefficients for parameter + * interpolation: + */ + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) { + c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); + c->reg_index += 2; + } + + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = c->fp->program.num_inputs * 2; + c->prog_data.curb_read_length = nr_curbe_regs; + + /* That's the end of the payload, now we can start allocating registers. + */ + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index++; + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index += 2; + + /* Now allocate room for the interpolated inputs and staging + * registers for the outputs: + */ + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_OUTPUT]; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + /* Beyond this we should only need registers for internal temporaries: + */ + c->tmp_start = c->reg_index; +} + + + + + +/* Need to interpolate fragment program inputs in as a preamble to the + * shader. A more sophisticated compiler would do this on demand, but + * we'll do it up front: + */ +void brw_wm_emit_decls(struct brw_wm_compile *c) +{ + struct tgsi_parse_context parse; + int done = 0; + + prealloc_reg(c); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned first = decl->u.DeclarationRange.First; + unsigned last = decl->u.DeclarationRange.Last; + unsigned mask = decl->Declaration.UsageMask; /* ? */ + unsigned i; + + if (decl->Declaration.File != TGSI_FILE_INPUT) + break; + + assert(decl->Declaration.Interpolate); + + for( i = first; i <= last; i++ ) { + switch (decl->Interpolation.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + emit_cinterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_linterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + //emit_pinterp(c, i, mask); + emit_linterp(c, i, mask); + break; + } + } + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + + release_tmps(c); +} diff --git a/src/mesa/pipe/i965simple/brw_wm_glsl.c b/src/mesa/pipe/i965simple/brw_wm_glsl.c index 90e73a605a..d6dfaed826 100644 --- a/src/mesa/pipe/i965simple/brw_wm_glsl.c +++ b/src/mesa/pipe/i965simple/brw_wm_glsl.c @@ -2,753 +2,437 @@ #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" -#if 0 - -/* Only guess, need a flag in gl_fragment_program later */ -boolean brw_wm_is_glsl(struct gl_fragment_program *fp) -{ - int i; - for (i = 0; i < fp->Base.NumInstructions; i++) { - struct prog_instruction *inst = &fp->Base.Instructions[i]; - switch (inst->Opcode) { - case OPCODE_IF: - case OPCODE_INT: - case OPCODE_ENDIF: - case OPCODE_CAL: - case OPCODE_BRK: - case OPCODE_RET: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_BGNLOOP: - return TRUE; - default: - break; - } - } - return FALSE; -} -static void set_reg(struct brw_wm_compile *c, int file, int index, - int component, struct brw_reg reg) -{ - c->wm_regs[file][index][component].reg = reg; - c->wm_regs[file][index][component].inited = TRUE; -} -static int get_scalar_dst_index(struct prog_instruction *inst) +static int get_scalar_dst_index(struct tgsi_full_instruction *inst) { - int i; - for (i = 0; i < 4; i++) - if (inst->DstReg.WriteMask & (1<FullDstRegisters[0].DstRegister; + int i; + for (i = 0; i < 4; i++) + if (dst.WriteMask & (1<tmp_index--, 0); - return reg; + c->tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); } static void release_tmps(struct brw_wm_compile *c) { - c->tmp_index = 127; + c->tmp_index = 0; } + static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, unsigned neg, unsigned abs) +get_reg(struct brw_wm_compile *c, int file, int index, int component ) { - struct brw_reg reg; - switch (file) { - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - file = PROGRAM_STATE_VAR; - break; - case PROGRAM_UNDEFINED: - return brw_null_reg(); - default: - break; - } - - if(c->wm_regs[file][index][component].inited) - reg = c->wm_regs[file][index][component].reg; - else - reg = brw_vec8_grf(c->reg_index, 0); - - if(!c->wm_regs[file][index][component].inited) { - set_reg(c, file, index, component, reg); - c->reg_index++; - } - - if (neg & (1<< component)) { - reg = negate(reg); - } - if (abs) - reg = brw_abs(reg); - return reg; + switch (file) { + case TGSI_FILE_NULL: + return brw_null_reg(); + + case TGSI_FILE_SAMPLER: + /* Should never get here: + */ + assert (0); + return brw_null_reg(); + + case TGSI_FILE_IMMEDIATE: + /* These need a different path: + */ + assert(0); + return brw_null_reg(); + + + case TGSI_FILE_CONSTANT: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_ADDRESS: + return c->wm_regs[file][index][component]; + + default: + assert(0); + return brw_null_reg(); + } } -static void prealloc_reg(struct brw_wm_compile *c) -{ - int i, j; - struct brw_reg reg; - int nr_interp_regs = 0; - unsigned inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; - - for (i = 0; i < 4; i++) { - reg = (i < c->key.nr_depth_regs) - ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); - } - c->reg_index += 2*c->key.nr_depth_regs; - { - int nr_params = c->fp->program.Base.Parameters->NumParameters; - struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - c->prog_data.nr_params = 4*nr_params; - for (i = 0; i < nr_params; i++) { - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index/8, - index%8); - c->prog_data.param[index] = - &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - c->nr_creg = 2*((4*nr_params+15)/16); - c->reg_index += c->nr_creg; - } - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (inputs & (1<reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - c->reg_index += 2; - - } - } - c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = nr_interp_regs * 2; - c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; -} static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - struct prog_instruction *inst, int component, int nr) -{ - return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, - 0, 0); -} - -static struct brw_reg get_src_reg(struct brw_wm_compile *c, - struct prog_src_register *src, int index, int nr) + struct tgsi_full_instruction *inst, + int component) { - int component = GET_SWZ(src->Swizzle, index); - return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + return get_reg(c, + inst->FullDstRegisters[0].DstRegister.File, + inst->FullDstRegisters[0].DstRegister.Index, + component); } -static void emit_abs( struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (inst->DstReg.WriteMask & (1<SrcReg[0], i, 1); - brw_MOV(p, dst, brw_abs(src)); - } - } - brw_set_saturate(p, 0); -} - -static void emit_int( struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - brw_RNDD(p, dst, src); - } - } - brw_set_saturate(p, 0); -} - -static void emit_mov( struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - brw_MOV(p, dst, src); - } - } - brw_set_saturate(p, 0); -} - -static void emit_pixel_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); - - struct brw_reg dst0, dst1; - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); - /* Calculate pixel centers by adding 1 or 0 to each of the - * micro-tile coordinates passed in r1. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 4), 2, 4, 0), - brw_imm_v(0x10101010)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 5), 2, 4, 0), - brw_imm_v(0x11001100)); - } - -} - -static void emit_delta_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) +static int get_swz( struct tgsi_src_register src, int index ) { - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg dst0, dst1, src0, src1; - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst0, - retype(src0, BRW_REGISTER_TYPE_UW), - negate(r1)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst1, - retype(src1, BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); - - } - + switch (index & 3) { + case 0: return src.SwizzleX; + case 1: return src.SwizzleY; + case 2: return src.SwizzleZ; + case 3: return src.SwizzleW; + default: return 0; + } } - -static void fire_fb_write( struct brw_wm_compile *c, - unsigned base_reg, - unsigned nr ) +static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index ) { - struct brw_compile *p = &c->func; - - /* Pass through control information: - */ - /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ - { - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ - brw_MOV(p, - brw_message_reg(base_reg + 1), - brw_vec8_grf(1, 0)); - brw_pop_insn_state(p); - } - /* Send framebuffer write message: */ - brw_fb_WRITE(p, - retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), - base_reg, - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), - 0, /* render surface always 0 */ - nr, - 0, - 1); + switch (index & 3) { + case 0: return src.ExtSwizzleX; + case 1: return src.ExtSwizzleY; + case 2: return src.ExtSwizzleZ; + case 3: return src.ExtSwizzleW; + default: return 0; + } } -static void emit_fb_write(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - int nr = 2; - int channel; - struct brw_reg src0;//, src1, src2, dst; - - /* Reserve a space for AA - may not be needed: - */ - if (c->key.aa_dest_stencil_reg) - nr += 1; - { - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); - } - fire_fb_write(c, 0, nr); -} - -static void emit_pixel_w( struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - if (mask & WRITEMASK_W) { - struct brw_reg dst, src0, delta0, delta1; - struct brw_reg interp3; - - dst = get_dst_reg(c, inst, 3, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - - interp3 = brw_vec1_grf(src0.nr+1, 4); - /* Calc 1/w - just linterp wpos[3] optimized by putting the - * result straight into a message reg. - */ - brw_LINE(p, brw_null_reg(), interp3, delta0); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); - - /* Calc w */ - brw_math_16( p, dst, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), - BRW_MATH_PRECISION_FULL); - } -} - -static void emit_linterp(struct brw_wm_compile *c, - struct prog_instruction *inst) +static struct brw_reg get_src_reg(struct brw_wm_compile *c, + struct tgsi_full_src_register *src, + int index) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0; - - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - unsigned nr = src0.nr; - int i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<SrcRegister.Negate) + neg = 1; + + component = get_swz(src->SrcRegister, component); + + /* Yes, there are multiple negates: + */ + switch (component & 3) { + case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break; + case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break; + case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break; + case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break; + } + + /* And multiple swizzles, fun isn't it: + */ + component = get_ext_swz(src->SrcRegisterExtSwz, component); + + /* Can't handle this, don't know if we need to: + */ + assert(src->SrcRegisterExtSwz.ExtDivide == 0); + + /* Not handling indirect lookups yet: + */ + assert(src->SrcRegister.Indirect == 0); + + /* Don't know what dimension means: + */ + assert(src->SrcRegister.Dimension == 0); + + /* Will never handle any of this stuff: + */ + assert(src->SrcRegisterExtMod.Complement == 0); + assert(src->SrcRegisterExtMod.Bias == 0); + assert(src->SrcRegisterExtMod.Scale2X == 0); + + if (src->SrcRegisterExtMod.Absolute) + abs = 1; + + /* Another negate! This is a post-absolute negate, which we + * can't do. Need to clean the crap out of tgsi somehow. + */ + assert(src->SrcRegisterExtMod.Negate == 0); + + switch( component ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + reg = get_reg(c, + src->SrcRegister.File, + src->SrcRegister.Index, + component ); + + if (neg) + reg = negate(reg); + + if (abs) + reg = brw_abs(reg); + + break; + + /* XXX: this won't really work in the general case, but we know + * that the extended swizzle is only allowed in the SWZ + * instruction (right??), in which case using an immediate + * directly will work. + */ + case TGSI_EXTSWIZZLE_ZERO: + reg = brw_imm_f(0); + break; + + case TGSI_EXTSWIZZLE_ONE: + if (neg && !abs) + reg = brw_imm_f(-1.0); + else + reg = brw_imm_f(1.0); + break; + + default: + assert(0); + break; + } + + + return reg; } -static void emit_cinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_abs( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - - struct brw_reg interp[4]; - struct brw_reg dst, src0; - - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - unsigned nr = src0.nr; - int i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<FullDstRegisters[0].DstRegister.WriteMask; + + int i; + struct brw_compile *p = &c->func; + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + brw_MOV(p, dst, brw_abs(src)); /* NOTE */ + } + } + brw_set_saturate(p, 0); } -static void emit_pinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0, w; - - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - w = get_src_reg(c, &inst->SrcReg[2], 3, 1); - unsigned nr = src0.nr; - int i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<func; - unsigned mask = inst->DstReg.WriteMask; - for (i = 0; i < 4; i++) { - unsigned i2 = (i+2)%3; - unsigned i1 = (i+1)%3; - if (mask & (1<SrcReg[0], i2, 1)); - src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); - brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - brw_MAC(p, dst, src0, src1); - brw_set_saturate(p, 0); - } - } - brw_set_saturate(p, 0); + int i; + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + for (i = 0; i < 4; i++) { + unsigned i2 = (i+2)%3; + unsigned i1 = (i+1)%3; + if (mask & (1<FullSrcRegisters[0], i2)); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1); + brw_MUL(p, brw_null_reg(), src0, src1); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + brw_MAC(p, dst, src0, src1); + brw_set_saturate(p, 0); + } + } + brw_set_saturate(p, 0); } static void emit_dp3(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_reg src0[3], src1[3], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); - } - - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, 0); + struct brw_reg src0[3], src1[3], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 3; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, 0); } static void emit_dp4(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); - } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, brw_null_reg(), src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, brw_null_reg(), src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); } static void emit_dph(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); - } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); } static void emit_math1(struct brw_wm_compile *c, - struct prog_instruction *inst, unsigned func) + struct tgsi_full_instruction *inst, unsigned func) { - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - brw_MOV(p, brw_message_reg(2), src0); - brw_math(p, + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MOV(p, brw_message_reg(2), src0); + brw_math(p, dst, func, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + ((inst->Instruction.Saturate != TGSI_SAT_NONE) + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } -static void emit_rcp(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_INV); -} -static void emit_rsq(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_alu2(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) { - emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_alu2(p, opcode, dst, src0, src1); + } + } + brw_set_saturate(p, 0); } -static void emit_sin(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); -} -static void emit_cos(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_alu1(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) { - emit_math1(c, inst, BRW_MATH_FUNCTION_COS); + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + brw_alu1(p, opcode, dst, src0); + } + } + if (inst->Instruction.Saturate != TGSI_SAT_NONE) + brw_set_saturate(p, 0); } -static void emit_ex2(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); -} - -static void emit_lg2(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); -} - -static void emit_add(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - unsigned mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_ADD(p, dst, src0, src1); - } - } - brw_set_saturate(p, 0); -} - -static void emit_sub(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - unsigned mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_ADD(p, dst, src0, negate(src1)); - } - } - brw_set_saturate(p, 0); -} - -static void emit_mul(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - unsigned mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_MUL(p, dst, src0, src1); - } - } - brw_set_saturate(p, 0); -} - -static void emit_frc(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - unsigned mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - brw_FRC(p, dst, src0); - } - } - if (inst->SaturateMode != SATURATE_OFF) - brw_set_saturate(p, 0); -} - -static void emit_flr(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - unsigned mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - brw_RNDD(p, dst, src0); - } - } - brw_set_saturate(p, 0); -} static void emit_max(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); } static void emit_min(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); } static void emit_pow(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + struct brw_compile *p = &c->func; + struct brw_reg dst, src0, src1; + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0); - brw_MOV(p, brw_message_reg(2), src0); - brw_MOV(p, brw_message_reg(3), src1); + brw_MOV(p, brw_message_reg(2), src0); + brw_MOV(p, brw_message_reg(3), src1); - brw_math(p, + brw_math(p, dst, BRW_MATH_FUNCTION_POW, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + (inst->Instruction.Saturate != TGSI_SAT_NONE + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, @@ -756,601 +440,636 @@ static void emit_pow(struct brw_wm_compile *c, } static void emit_lrp(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg dst, tmp1, tmp2, src0, src1, src2; - int i; - for (i = 0; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - - if (src1.nr == dst.nr) { - tmp1 = alloc_tmp(c); - brw_MOV(p, tmp1, src1); - } else - tmp1 = src1; - - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); - if (src2.nr == dst.nr) { - tmp2 = alloc_tmp(c); - brw_MOV(p, tmp2, src2); - } else - tmp2 = src2; - - brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); - brw_MUL(p, brw_null_reg(), dst, tmp2); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0, tmp1); - brw_set_saturate(p, 0); - } - release_tmps(c); - } + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, tmp1, tmp2, src0, src1, src2; + int i; + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + + if (src1.nr == dst.nr) { + tmp1 = alloc_tmp(c); + brw_MOV(p, tmp1, src1); + } else + tmp1 = src1; + + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + if (src2.nr == dst.nr) { + tmp2 = alloc_tmp(c); + brw_MOV(p, tmp2, src2); + } else + tmp2 = src2; + + brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, tmp2); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0, tmp1); + brw_set_saturate(p, 0); + } + release_tmps(c); + } } static void emit_kil(struct brw_wm_compile *c) { - struct brw_compile *p = &c->func; - struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK - brw_AND(p, depth, c->emit_mask_reg, depth); - brw_pop_insn_state(p); + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); } static void emit_mad(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1, src2; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); - brw_MUL(p, dst, src0, src1); - - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, dst, src2); - brw_set_saturate(p, 0); - } - } + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1, src2; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + brw_MUL(p, dst, src0, src1); + + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, dst, src2); + brw_set_saturate(p, 0); + } + } } static void emit_sop(struct brw_wm_compile *c, - struct prog_instruction *inst, unsigned cond) + struct tgsi_full_instruction *inst, unsigned cond) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1; - int i; - - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_CMP(p, brw_null_reg(), cond, src0, src1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(0.0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(1.0)); - } - } - brw_pop_insn_state(p); + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1; + int i; + + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_CMP(p, brw_null_reg(), cond, src0, src1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(0.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_pop_insn_state(p); } -static void emit_slt(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_L); -} -static void emit_sle(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_ddx(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) { - emit_sop(c, inst, BRW_CONDITIONAL_LE); + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + nr = src0.nr; + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + nr = src0.nr; + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned i; + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); -static void emit_seq(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_EQ); -} +#if 0 + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + default: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), src[2]); + break; + } +#else + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); +#endif -static void emit_sne(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_NEQ); + brw_MOV(p, brw_message_reg(5), src[3]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + 4, + 4, + 0); +#endif } -static void emit_ddx(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_tex(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - unsigned nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); - nr = src0.nr; - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned msg_len; + unsigned i, nr; + unsigned emit; + boolean shadow = (c->key.shadowtex_mask & (1<TexSrcUnit)) ? 1 : 0; + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); -static void emit_ddy(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - unsigned nr, i; - - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - nr = src0.nr; - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<TexSrcTarget) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + default: + emit = WRITEMASK_XYZ; + nr = 3; + break; + } +#else + emit = WRITEMASK_XY; + nr = 2; +#endif -static void emit_wpos_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg src0[2], dst[2]; - - dst[0] = get_dst_reg(c, inst, 0, 1); - dst[1] = get_dst_reg(c, inst, 1, 1); - - src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1); - - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_MOV(p, - dst[0], - retype(src0[0], BRW_REGISTER_TYPE_UW)); - } - - if (mask & WRITEMASK_Y) { - /* TODO -- window_height - Y */ - brw_MOV(p, - dst[1], - retype(src0[1], BRW_REGISTER_TYPE_UW)); - - } + msg_len = 1; + + for (i = 0; i < nr; i++) { + static const unsigned swz[4] = {0,1,2,2}; + if (emit & (1<TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, + 4, + shadow ? 6 : 4, + 0); + + if (shadow) + brw_MOV(p, dst[3], brw_imm_f(1.0)); +#endif } -/* TODO - BIAS on SIMD8 not workind yet... - */ -static void emit_txb(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - unsigned i; - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - default: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), src[2]); - break; - } - brw_MOV(p, brw_message_reg(5), src[3]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - inst->TexSrcUnit + 1, /* surface */ - inst->TexSrcUnit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, - 4, - 4, - 0); -} -static void emit_tex(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - unsigned msg_len; - unsigned i, nr; - unsigned emit; - boolean shadow = (c->key.shadowtex_mask & (1<TexSrcUnit)) ? 1 : 0; - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - emit = WRITEMASK_X; - nr = 1; - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - emit = WRITEMASK_XY; - nr = 2; - break; - default: - emit = WRITEMASK_XYZ; - nr = 3; - break; - } - msg_len = 1; - - for (i = 0; i < nr; i++) { - static const unsigned swz[4] = {0,1,2,2}; - if (emit & (1<TexSrcUnit + 1, /* surface */ - inst->TexSrcUnit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, - 4, - shadow ? 6 : 4, - 0); - - if (shadow) - brw_MOV(p, dst[3], brw_imm_f(1.0)); -} -static void post_wm_emit( struct brw_wm_compile *c ) + +static void emit_fb_write(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) { - unsigned nr_insns = c->fp->program.Base.NumInstructions; - unsigned insn, target_insn; - struct prog_instruction *inst1, *inst2; - struct brw_instruction *brw_inst1, *brw_inst2; - int offset; - for (insn = 0; insn < nr_insns; insn++) { - inst1 = &c->fp->program.Base.Instructions[insn]; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case OPCODE_CAL: - target_insn = inst1->BranchTarget; - inst2 = &c->fp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } - } + struct brw_compile *p = &c->func; + int nr = 2; + int channel; + int base_reg = 0; + + // src0 = output color + // src1 = payload_depth[0] + // src2 = output depth + // dst = ??? + + + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + + { + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel]; + + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); + } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); + } + + + /* Pass through control information: + */ + /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + + /* Send framebuffer write message: */ + brw_fb_WRITE(p, + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + 0, /* render surface always 0 */ + nr, + 0, + 1); + } -static void brw_wm_emit_glsl(struct brw_wm_compile *c) +static void brw_wm_emit_instruction( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst ) { -#define MAX_IFSN 32 -#define MAX_LOOP_DEPTH 32 - struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH]; - struct brw_instruction *inst0, *inst1; - int i, if_insn = 0, loop_insn = 0; - struct brw_compile *p = &c->func; - struct brw_indirect stack_index = brw_indirect(0, 0); - - brw_init_compile(&c->func); - c->reg_index = 0; - prealloc_reg(c); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); - - for (i = 0; i < c->nr_fp_insns; i++) { - struct prog_instruction *inst = &c->prog_instructions[i]; - struct prog_instruction *orig_inst; - - if ((orig_inst = inst->Data) != 0) - orig_inst->Data = current_insn(p); - - if (inst->CondUpdate) - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); - - switch (inst->Opcode) { - case WM_PIXELXY: - emit_pixel_xy(c, inst); - break; - case WM_DELTAXY: - emit_delta_xy(c, inst); - break; - case WM_PIXELW: - emit_pixel_w(c, inst); - break; - case WM_LINTERP: - emit_linterp(c, inst); - break; - case WM_PINTERP: - emit_pinterp(c, inst); - break; - case WM_CINTERP: - emit_cinterp(c, inst); - break; - case WM_WPOSXY: - emit_wpos_xy(c, inst); - break; - case WM_FB_WRITE: - emit_fb_write(c, inst); - break; - case OPCODE_ABS: - emit_abs(c, inst); - break; - case OPCODE_ADD: - emit_add(c, inst); - break; - case OPCODE_SUB: - emit_sub(c, inst); - break; - case OPCODE_FRC: - emit_frc(c, inst); - break; - case OPCODE_FLR: - emit_flr(c, inst); - break; - case OPCODE_LRP: - emit_lrp(c, inst); - break; - case OPCODE_INT: - emit_int(c, inst); - break; - case OPCODE_MOV: - emit_mov(c, inst); - break; - case OPCODE_DP3: - emit_dp3(c, inst); - break; - case OPCODE_DP4: - emit_dp4(c, inst); - break; - case OPCODE_XPD: - emit_xpd(c, inst); - break; - case OPCODE_DPH: - emit_dph(c, inst); - break; - case OPCODE_RCP: - emit_rcp(c, inst); - break; - case OPCODE_RSQ: - emit_rsq(c, inst); - break; - case OPCODE_SIN: - emit_sin(c, inst); - break; - case OPCODE_COS: - emit_cos(c, inst); - break; - case OPCODE_EX2: - emit_ex2(c, inst); - break; - case OPCODE_LG2: - emit_lg2(c, inst); - break; - case OPCODE_MAX: - emit_max(c, inst); - break; - case OPCODE_MIN: - emit_min(c, inst); - break; - case OPCODE_DDX: - emit_ddx(c, inst); - break; - case OPCODE_DDY: - emit_ddy(c, inst); - break; - case OPCODE_SLT: - emit_slt(c, inst); - break; - case OPCODE_SLE: - emit_sle(c, inst); - break; - case OPCODE_SGT: - emit_sgt(c, inst); - break; - case OPCODE_SGE: - emit_sge(c, inst); - break; - case OPCODE_SEQ: - emit_seq(c, inst); - break; - case OPCODE_SNE: - emit_sne(c, inst); - break; - case OPCODE_MUL: - emit_mul(c, inst); - break; - case OPCODE_POW: - emit_pow(c, inst); - break; - case OPCODE_MAD: - emit_mad(c, inst); - break; - case OPCODE_TEX: - emit_tex(c, inst); - break; - case OPCODE_TXB: - emit_txb(c, inst); - break; - case OPCODE_KIL_NV: - emit_kil(c); - break; - case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); - break; - case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); - break; - case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); - break; - case OPCODE_BGNSUB: - case OPCODE_ENDSUB: - break; - case OPCODE_CAL: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(4)); - orig_inst = inst->Data; - orig_inst->Data = &p->store[p->nr_insn]; - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_pop_insn_state(p); - break; - - case OPCODE_RET: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(-4)); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_pop_insn_state(p); - - break; - case OPCODE_BGNLOOP: - loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); - break; - case OPCODE_BRK: - brw_BREAK(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_CONT: - brw_CONT(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_ENDLOOP: - loop_insn--; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]); - /* patch all the BREAK instructions from - last BEGINLOOP */ - while (inst0 > loop_inst[loop_insn]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { - inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; - inst0->bits3.if_else.pop_count = 0; - } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { - inst0->bits3.if_else.jump_count = inst1 - inst0; - inst0->bits3.if_else.pop_count = 0; - } - } - break; - default: - _mesa_printf("unsupported IR in fragment shader %d\n", - inst->Opcode); - } - if (inst->CondUpdate) - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - else - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - post_wm_emit(c); - for (i = 0; i < c->fp->program.Base.NumInstructions; i++) - c->fp->program.Base.Instructions[i].Data = NULL; + struct brw_compile *p = &c->func; + +#if 0 + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#endif + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + emit_abs(c, inst); + break; + case TGSI_OPCODE_ADD: + emit_alu2(c, inst, BRW_OPCODE_ADD); + break; + case TGSI_OPCODE_SUB: + assert(0); +// emit_alu2(c, inst, BRW_OPCODE_SUB); + break; + case TGSI_OPCODE_FRC: + emit_alu1(c, inst, BRW_OPCODE_FRC); + break; + case TGSI_OPCODE_FLR: + assert(0); +// emit_alu1(c, inst, BRW_OPCODE_FLR); + break; + case TGSI_OPCODE_LRP: + emit_lrp(c, inst); + break; + case TGSI_OPCODE_INT: + emit_alu1(c, inst, BRW_OPCODE_RNDD); + break; + case TGSI_OPCODE_MOV: + emit_alu1(c, inst, BRW_OPCODE_MOV); + break; + case TGSI_OPCODE_DP3: + emit_dp3(c, inst); + break; + case TGSI_OPCODE_DP4: + emit_dp4(c, inst); + break; + case TGSI_OPCODE_XPD: + emit_xpd(c, inst); + break; + case TGSI_OPCODE_DPH: + emit_dph(c, inst); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, inst, BRW_MATH_FUNCTION_INV); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); + break; + case TGSI_OPCODE_SIN: + emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); + break; + case TGSI_OPCODE_COS: + emit_math1(c, inst, BRW_MATH_FUNCTION_COS); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); + break; + case TGSI_OPCODE_MAX: + emit_max(c, inst); + break; + case TGSI_OPCODE_MIN: + emit_min(c, inst); + break; + case TGSI_OPCODE_DDX: + emit_ddx(c, inst); + break; + case TGSI_OPCODE_DDY: + emit_ddy(c, inst); + break; + case TGSI_OPCODE_SLT: + emit_sop(c, inst, BRW_CONDITIONAL_L); + break; + case TGSI_OPCODE_SLE: + emit_sop(c, inst, BRW_CONDITIONAL_LE); + break; + case TGSI_OPCODE_SGT: + emit_sop(c, inst, BRW_CONDITIONAL_G); + break; + case TGSI_OPCODE_SGE: + emit_sop(c, inst, BRW_CONDITIONAL_GE); + break; + case TGSI_OPCODE_SEQ: + emit_sop(c, inst, BRW_CONDITIONAL_EQ); + break; + case TGSI_OPCODE_SNE: + emit_sop(c, inst, BRW_CONDITIONAL_NEQ); + break; + case TGSI_OPCODE_MUL: + emit_alu2(c, inst, BRW_OPCODE_MUL); + break; + case TGSI_OPCODE_POW: + emit_pow(c, inst); + break; + case TGSI_OPCODE_MAD: + emit_mad(c, inst); + break; + case TGSI_OPCODE_TEX: + emit_tex(c, inst); + break; + case TGSI_OPCODE_TXB: + emit_txb(c, inst); + break; + case TGSI_OPCODE_TEXKILL: + emit_kil(c); + break; + case TGSI_OPCODE_IF: + assert(c->if_insn < MAX_IFSN); + c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_ELSE: + c->if_inst[c->if_insn-1] = brw_ELSE(p, c->if_inst[c->if_insn-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(c->if_insn > 0); + brw_ENDIF(p, c->if_inst[--c->if_insn]); + break; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + break; + case TGSI_OPCODE_CAL: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(4)); +// orig_inst = inst->Data; +// orig_inst->Data = &p->store[p->nr_insn]; + assert(0); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_pop_insn_state(p); + break; + + case TGSI_OPCODE_RET: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); + + break; + case TGSI_OPCODE_LOOP: + c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_ENDLOOP: + c->loop_insn--; + c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]); + /* patch all the BREAK instructions from + last BEGINLOOP */ + while (c->inst0 > c->loop_inst[c->loop_insn]) { + c->inst0--; + if (c->inst0->header.opcode == BRW_OPCODE_BREAK) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1; + c->inst0->bits3.if_else.pop_count = 0; + } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0; + c->inst0->bits3.if_else.pop_count = 0; + } + } + break; + case TGSI_OPCODE_END: + emit_fb_write(c, inst); + break; + + default: + _mesa_printf("unsupported IR in fragment shader %d\n", + inst->Instruction.Opcode); + } +#if 0 + if (inst->CondUpdate) + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + else + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif } + + + + + void brw_wm_glsl_emit(struct brw_wm_compile *c) { - brw_wm_pass_fp(c); - c->tmp_index = 127; - brw_wm_emit_glsl(c); - c->prog_data.total_grf = c->reg_index; - c->prog_data.total_scratch = 0; -} + struct tgsi_parse_context parse; + struct brw_compile *p = &c->func; + + brw_init_compile(&c->func); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + c->reg_index = 0; + c->if_insn = 0; + c->loop_insn = 0; + c->stack_index = brw_indirect(0,0); + + /* Do static register allocation and parameter interpolation: + */ + brw_wm_emit_decls( c ); + + /* Emit the actual program. All done with very direct translation, + * hopefully we can improve on this shortly... + */ + brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* already done */ + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* not handled yet */ + assert(0); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + } + + tgsi_parse_free (&parse); + + /* Fix up call targets: + */ +#if 0 + { + unsigned nr_insns = c->fp->program.Base.NumInstructions; + unsigned insn, target_insn; + struct tgsi_full_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->fp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case TGSI_OPCODE_CAL: + target_insn = inst1->BranchTarget; + inst2 = &c->fp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } + } #endif + + c->prog_data.total_grf = c->reg_index; + c->prog_data.total_scratch = 0; +} diff --git a/src/mesa/pipe/i965simple/brw_wm_sampler_state.c b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c index 4ea0dd7db0..fbeea8c809 100644 --- a/src/mesa/pipe/i965simple/brw_wm_sampler_state.c +++ b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c @@ -229,12 +229,12 @@ static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_samp * complicates various things. However, this is still too confusing - * FIXME: simplify all the different new texture state flags. */ -void brw_upload_wm_samplers(struct brw_context *brw) +static void upload_wm_samplers(struct brw_context *brw) { unsigned unit; unsigned sampler_count = 0; - /* _NEW_TEXTURE */ + /* BRW_NEW_SAMPLER */ for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { if (brw->attribs.Samplers[unit]) { /* FIXME: correctly detect enabled ones */ const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; @@ -262,14 +262,11 @@ void brw_upload_wm_samplers(struct brw_context *brw) sizeof(struct brw_sampler_state) * brw->wm.sampler_count); } -#if 0 const struct brw_tracked_state brw_wm_samplers = { .dirty = { - .mesa = _NEW_TEXTURE, - .brw = 0, + .brw = BRW_NEW_SAMPLER, .cache = 0 }, .update = upload_wm_samplers }; -#endif diff --git a/src/mesa/pipe/i965simple/brw_wm_state.c b/src/mesa/pipe/i965simple/brw_wm_state.c index b45fb2f56b..52d2c85423 100644 --- a/src/mesa/pipe/i965simple/brw_wm_state.c +++ b/src/mesa/pipe/i965simple/brw_wm_state.c @@ -34,15 +34,13 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_wm.h" +#include "pipe/p_util.h" /*********************************************************************** * WM unit - fragment programs and rasterization */ - -#if 0 static void upload_wm_unit(struct brw_context *brw ) { - struct intel_context *intel = &brw->intel; struct brw_wm_unit_state wm; unsigned max_threads; unsigned per_thread; @@ -56,7 +54,7 @@ static void upload_wm_unit(struct brw_context *brw ) memset(&wm, 0, sizeof(wm)); /* CACHE_NEW_WM_PROG */ - wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1; + wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1; wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; @@ -64,9 +62,10 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm5.max_threads = max_threads; - per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024); + per_thread = align(brw->wm.prog_data->total_scratch, 1024); assert(per_thread <= 12 * 1024); +#if 0 if (brw->wm.prog_data->total_scratch) { unsigned total = per_thread * (max_threads + 1); @@ -95,6 +94,7 @@ static void upload_wm_unit(struct brw_context *brw ) * so just fail for now if we hit that path. */ assert(brw->wm.prog_data->total_scratch == 0); +#endif /* CACHE_NEW_SURFACE */ wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; @@ -112,23 +112,20 @@ static void upload_wm_unit(struct brw_context *brw ) /* BRW_NEW_FRAGMENT_PROGRAM */ { - const struct gl_fragment_program *fp = brw->fragment_program; + const struct brw_fragment_program *fp = brw->attribs.FragmentProgram; - if (fp->Base.InputsRead & (1<UsesDepth) wm.wm5.program_uses_depth = 1; /* as far as we can tell */ - if (fp->Base.OutputsWritten & (1<ComputesDepth) wm.wm5.program_computes_depth = 1; - /* _NEW_COLOR */ + /* BRW_NEW_ALPHA_TEST */ if (fp->UsesKill || - brw->attribs.Color->AlphaEnabled) + brw->attribs.AlphaTest->enabled) wm.wm5.program_uses_killpixel = 1; - if (brw_wm_is_glsl(fp)) - wm.wm5.enable_8_pix = 1; - else - wm.wm5.enable_16_pix = 1; + wm.wm5.enable_8_pix = 1; } wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ @@ -138,11 +135,11 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm5.line_aa_region_width = 0; wm.wm5.line_endcap_aa_region_width = 1; - /* _NEW_POLYGONSTIPPLE */ - if (brw->attribs.Polygon->StippleFlag) + /* BRW_NEW_RASTERIZER */ + if (brw->attribs.Raster->poly_stipple_enable) wm.wm5.polygon_stipple = 1; - /* _NEW_POLYGON */ +#if 0 if (brw->attribs.Polygon->OffsetFill) { wm.wm5.depth_offset = 1; /* Something wierd going on with legacy_global_depth_bias, @@ -156,13 +153,13 @@ static void upload_wm_unit(struct brw_context *brw ) */ wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; } +#endif - /* _NEW_LINE */ - if (brw->attribs.Line->StippleFlag) { + if (brw->attribs.Raster->line_stipple_enable) { wm.wm5.line_stipple = 1; } - if (BRW_DEBUG & DEBUG_STATS || intel->stats_wm) + if (BRW_DEBUG & DEBUG_STATS) wm.wm4.stats_enable = 1; brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); @@ -183,14 +180,10 @@ static void upload_wm_unit(struct brw_context *brw ) const struct brw_tracked_state brw_wm_unit = { .dirty = { - .mesa = (_NEW_POLYGON | - _NEW_POLYGONSTIPPLE | - _NEW_LINE | - _NEW_COLOR), - - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_CURBE_OFFSETS | - BRW_NEW_LOCK), + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_ALPHA_TEST | + BRW_NEW_FS | + BRW_NEW_CURBE_OFFSETS), .cache = (CACHE_NEW_SURFACE | CACHE_NEW_WM_PROG | @@ -199,4 +192,3 @@ const struct brw_tracked_state brw_wm_unit = { .update = upload_wm_unit }; -#endif diff --git a/src/mesa/pipe/p_util.h b/src/mesa/pipe/p_util.h index e6d284d932..46edcf3075 100644 --- a/src/mesa/pipe/p_util.h +++ b/src/mesa/pipe/p_util.h @@ -360,6 +360,11 @@ static INLINE float LOG2(float val) #define CEILF(x) ((float) ceil(x)) #endif +static INLINE int align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + /* Convenient... */ extern void _mesa_printf(const char *str, ...); diff --git a/src/mesa/pipe/softpipe/sp_texture.c b/src/mesa/pipe/softpipe/sp_texture.c index 2dd1add6f7..44512e4281 100644 --- a/src/mesa/pipe/softpipe/sp_texture.c +++ b/src/mesa/pipe/softpipe/sp_texture.c @@ -52,10 +52,6 @@ static unsigned minify( unsigned d ) return MAX2(1, d>>1); } -static int align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} static void -- cgit v1.2.3