diff options
author | Ben Skeggs <skeggsb@gmail.com> | 2007-12-15 09:48:11 +1100 |
---|---|---|
committer | Ben Skeggs <skeggsb@gmail.com> | 2007-12-15 09:48:11 +1100 |
commit | b5b9ac62e6a9667de4bb2078d5cd3199fe25f619 (patch) | |
tree | 5c3f389d10132cd2c9733c5c0f3d81a657298f53 | |
parent | 7f89c776e19b400c0adf647fc9dfb392efe88dbd (diff) | |
parent | 017f862de1f857bca29f09794539aaf411014f13 (diff) |
Merge branch 'upstream-gallium-0.1' into darktama-gallium-0.1
62 files changed, 2325 insertions, 3929 deletions
diff --git a/progs/vp/vp-tris.c b/progs/vp/vp-tris.c index e5be65e78c..f9e6cdad74 100644 --- a/progs/vp/vp-tris.c +++ b/progs/vp/vp-tris.c @@ -90,7 +90,9 @@ static void Init( void ) } fprintf(stderr, "%.*s\n", sz, buf); - + + glEnable(GL_VERTEX_PROGRAM_NV); + glGenProgramsARB(1, &prognum); glBindProgramARB(GL_VERTEX_PROGRAM_ARB, prognum); @@ -168,8 +170,6 @@ static void Display( void ) glClearColor(0.3, 0.3, 0.3, 1); glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT ); - glEnable(GL_VERTEX_PROGRAM_NV); - glBegin(GL_TRIANGLES); diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index efb3b35f6a..d866d10017 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -936,10 +936,16 @@ static void load_texture( struct texenv_fragment_program *p, GLuint unit ) /* TODO: Use D0_MASK_XY where possible. */ - if (p->state->unit[unit].enabled) + if (p->state->unit[unit].enabled) { p->src_texture[unit] = emit_texld( p, OPCODE_TXP, tmp, WRITEMASK_XYZW, unit, dim, texcoord ); + p->program->Base.SamplersUsed |= (1 << unit); + /* This identity mapping should already be in place + * (see _mesa_init_program_struct()) but let's be safe. + */ + p->program->Base.SamplerUnits[unit] = unit; + } else p->src_texture[unit] = get_zero(p); } diff --git a/src/mesa/pipe/i915simple/i915_fpc_translate.c b/src/mesa/pipe/i915simple/i915_fpc_translate.c index 1cd554250c..d517b88acc 100644 --- a/src/mesa/pipe/i915simple/i915_fpc_translate.c +++ b/src/mesa/pipe/i915simple/i915_fpc_translate.c @@ -928,8 +928,9 @@ i915_translate_instructions(struct i915_fp_compile *p, break; case TGSI_TOKEN_TYPE_IMMEDIATE: - /* XXX no-op? */ - assert(0); + /* This is a no-op. We'll get immediates from the usual constant/ + * uniform buffer. + */ break; case TGSI_TOKEN_TYPE_INSTRUCTION: diff --git a/src/mesa/pipe/i915simple/i915_texture.c b/src/mesa/pipe/i915simple/i915_texture.c index fefd105adf..44f72e63cc 100644 --- a/src/mesa/pipe/i915simple/i915_texture.c +++ b/src/mesa/pipe/i915simple/i915_texture.c @@ -47,10 +47,6 @@ static unsigned minify( unsigned d ) return MAX2(1, d>>1); } -static int align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} static void diff --git a/src/mesa/pipe/i965simple/Makefile b/src/mesa/pipe/i965simple/Makefile index eda5afaea5..48c00ab50b 100644 --- a/src/mesa/pipe/i965simple/Makefile +++ b/src/mesa/pipe/i965simple/Makefile @@ -31,6 +31,7 @@ DRIVER_SOURCES = \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ + brw_shader_info.c \ brw_state.c \ brw_state_batch.c \ brw_state_cache.c \ @@ -40,12 +41,11 @@ DRIVER_SOURCES = \ brw_urb.c \ brw_util.c \ brw_vs.c \ - brw_vs_constval.c \ brw_vs_emit.c \ brw_vs_state.c \ - brw_vtbl.c \ brw_wm.c \ brw_wm_iz.c \ + brw_wm_decl.c \ brw_wm_glsl.c \ brw_wm_sampler_state.c \ brw_wm_state.c \ diff --git a/src/mesa/pipe/i965simple/brw_batch.h b/src/mesa/pipe/i965simple/brw_batch.h index 7c778f360b..bef69ac871 100644 --- a/src/mesa/pipe/i965simple/brw_batch.h +++ b/src/mesa/pipe/i965simple/brw_batch.h @@ -36,7 +36,7 @@ #define INTEL_BATCH_CLIPRECTS 0x2 #define BEGIN_BATCH( dwords, relocs ) \ - (brw->batch_start = brw->winsys->batch_start(brw->winsys, dwords, relocs)) + brw->winsys->batch_start(brw->winsys, dwords, relocs) #define OUT_BATCH( dword ) \ brw->winsys->batch_dword(brw->winsys, dword) @@ -50,7 +50,6 @@ */ #define FLUSH_BATCH(fence) do { \ brw->winsys->batch_flush(brw->winsys, fence); \ - brw->batch_start = NULL; \ brw->hardware_dirty = ~0; \ } while (0) diff --git a/src/mesa/pipe/i965simple/brw_cc.c b/src/mesa/pipe/i965simple/brw_cc.c index fc7fdba53f..6cc1505311 100644 --- a/src/mesa/pipe/i965simple/brw_cc.c +++ b/src/mesa/pipe/i965simple/brw_cc.c @@ -142,7 +142,7 @@ static void upload_cc_vp( struct brw_context *brw ) const struct brw_tracked_state brw_cc_vp = { .dirty = { - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_SCENE, .cache = 0 }, .update = upload_cc_vp diff --git a/src/mesa/pipe/i965simple/brw_clip_state.c b/src/mesa/pipe/i965simple/brw_clip_state.c index 51a4666a0b..ea5c05a279 100644 --- a/src/mesa/pipe/i965simple/brw_clip_state.c +++ b/src/mesa/pipe/i965simple/brw_clip_state.c @@ -32,7 +32,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" - +#include "pipe/p_util.h" static void upload_clip_unit( struct brw_context *brw ) @@ -43,7 +43,7 @@ static void upload_clip_unit( struct brw_context *brw ) /* CACHE_NEW_CLIP_PROG */ clip.thread0.grf_reg_count = - ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1; + align(brw->clip.prog_data->total_grf, 16) / 16 - 1; clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; diff --git a/src/mesa/pipe/i965simple/brw_context.c b/src/mesa/pipe/i965simple/brw_context.c index e69ba6938e..5e58701e91 100644 --- a/src/mesa/pipe/i965simple/brw_context.c +++ b/src/mesa/pipe/i965simple/brw_context.c @@ -237,7 +237,6 @@ struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys, brw->pci_id = pci_id; brw->dirty = ~0; brw->hardware_dirty = ~0; - brw->batch_start = NULL; memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); diff --git a/src/mesa/pipe/i965simple/brw_context.h b/src/mesa/pipe/i965simple/brw_context.h index 53f66cd6a9..318c6a7049 100644 --- a/src/mesa/pipe/i965simple/brw_context.h +++ b/src/mesa/pipe/i965simple/brw_context.h @@ -119,7 +119,6 @@ * Handles blending and (presumably) depth and stencil testing. */ -#define BRW_FALLBACK_TEXTURE 0x1 #define BRW_MAX_CURBE (32*16) struct brw_context; @@ -147,16 +146,13 @@ struct brw_winsys; /* Raised for other internal events: */ #define BRW_NEW_URB_FENCE 0x10000 -#define BRW_NEW_INPUT_DIMENSIONS 0x20000 +#define BRW_NEW_PSP 0x20000 #define BRW_NEW_CURBE_OFFSETS 0x40000 #define BRW_NEW_REDUCED_PRIMITIVE 0x80000 #define BRW_NEW_PRIMITIVE 0x100000 -#define BRW_NEW_CONTEXT 0x200000 -#define BRW_NEW_WM_INPUT_DIMENSIONS 0x400000 -#define BRW_NEW_INPUT_VARYING 0x800000 -#define BRW_NEW_PSP 0x1000000 +#define BRW_NEW_SCENE 0x200000 +#define BRW_NEW_SF_LINKAGE 0x400000 -#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) extern int BRW_DEBUG; #define DEBUG_TEXTURE 0x1 @@ -198,23 +194,47 @@ struct brw_state_flags { unsigned brw; }; + +struct brw_shader_info { + int nr_regs[8]; /* TGSI_FILE_* */ +}; + + + struct brw_vertex_program { struct pipe_shader_state program; - unsigned id; - unsigned param_state; /* flags indicating state tracked by params */ + struct brw_shader_info info; + int id; }; struct brw_fragment_program { struct pipe_shader_state program; - unsigned id; - unsigned param_state; /* flags indicating state tracked by params */ + struct brw_shader_info info; + + boolean UsesDepth; boolean UsesKill; boolean ComputesDepth; + int id; }; + + +struct pipe_setup_linkage { + struct { + unsigned vp_output:5; + unsigned interp_mode:4; + unsigned bf_vp_output:5; + } fp_input[PIPE_MAX_SHADER_INPUTS]; + + unsigned fp_input_count:5; + unsigned max_vp_output:5; +}; + + + struct brw_texture { struct pipe_texture base; @@ -248,6 +268,12 @@ struct brw_texture { * corresponding to a different brw_wm_prog_key struct, with different * compiled programs: */ +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ + struct brw_wm_prog_data { unsigned curb_read_length; unsigned urb_read_length; @@ -256,13 +282,14 @@ struct brw_wm_prog_data { unsigned total_grf; unsigned total_scratch; - unsigned nr_params; - boolean error; - - /* Pointer to tracked values (only valid once - * _mesa_load_state_parameters has been called at runtime). + /* Internally generated constants for the CURBE. These are loaded + * ahead of the data from the constant buffer. */ - const float *param[BRW_MAX_CURBE]; + const float internal_const[8]; + unsigned nr_internal_consts; + unsigned max_const; + + boolean error; }; struct brw_sf_prog_data { @@ -298,19 +325,14 @@ struct brw_vs_prog_data { unsigned inputs_read; + unsigned max_const; + /* Used for calculating urb partitions: */ unsigned urb_entry_size; }; -/* Size == 0 if output either not written, or always [0,0,0,1] - */ -struct brw_vs_ouput_sizes { - ubyte output_size[PIPE_MAX_SHADER_OUTPUTS]; -}; - - #define BRW_MAX_TEX_UNIT 8 #define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 @@ -374,8 +396,6 @@ struct brw_cache { struct brw_tracked_state { struct brw_state_flags dirty; void (*update)( struct brw_context *brw ); - void (*emit_reloc)( struct brw_context *brw ); - boolean always_update; }; @@ -455,8 +475,6 @@ struct brw_context struct { struct brw_state_flags dirty; - struct brw_tracked_state **atoms; - unsigned nr_atoms; } state; @@ -489,34 +507,23 @@ struct brw_context /* Arrays with buffer objects to copy non-bufferobj arrays into * for upload: */ - struct pipe_vertex_buffer vbo_array[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; struct brw_vertex_element inputs[PIPE_ATTRIB_MAX]; #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) - struct { - struct pipe_buffer_handle *vbo[BRW_NR_UPLOAD_BUFS]; - unsigned buf; - unsigned offset; - unsigned size; - unsigned wrap; - } upload; - /* Summary of size and varying of active arrays, so we can check * for changes to this state: */ struct brw_vertex_info info; - int last_vb; } vb; - unsigned *batch_start; unsigned hardware_dirty; unsigned dirty; unsigned pci_id; - /* BRW_NEW_URB_ALLOCATIONS: */ struct { @@ -557,11 +564,6 @@ struct brw_context unsigned vs_size; unsigned total_size; - /* Dynamic tracker which changes to reflect the state referenced - * by active fp and vp program parameters: - */ - struct brw_tracked_state tracked_state; - unsigned gs_offset; float *last_buf; @@ -595,6 +597,8 @@ struct brw_context struct { struct brw_sf_prog_data *prog_data; + struct pipe_setup_linkage linkage; + unsigned prog_gs_offset; unsigned vp_gs_offset; unsigned state_gs_offset; @@ -602,11 +606,8 @@ struct brw_context struct { struct brw_wm_prog_data *prog_data; - struct brw_wm_compile *compile_data; - /* Input sizes, calculated from active vertex program: - */ - unsigned input_size_masks[4]; +// struct brw_wm_compiler *compile_data; /** @@ -667,8 +668,6 @@ void brw_destroy_state(struct brw_context *brw); * brw_tex.c */ void brwUpdateTextureState( struct brw_context *brw ); -void brw_FrameBufferTexInit( struct brw_context *brw ); -void brw_FrameBufferTexDestroy( struct brw_context *brw ); /* brw_urb.c diff --git a/src/mesa/pipe/i965simple/brw_curbe.c b/src/mesa/pipe/i965simple/brw_curbe.c index 0894e82d56..b943a7af98 100644 --- a/src/mesa/pipe/i965simple/brw_curbe.c +++ b/src/mesa/pipe/i965simple/brw_curbe.c @@ -35,6 +35,9 @@ #include "brw_defines.h" #include "brw_state.h" #include "brw_util.h" +#include "brw_wm.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" #define FILE_DEBUG_FLAG DEBUG_FALLBACKS @@ -43,11 +46,10 @@ static void calculate_curbe_offsets( struct brw_context *brw ) { /* CACHE_NEW_WM_PROG */ - unsigned nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; + unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16); /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->attribs.VertexProgram; - unsigned nr_vp_regs = (vp->program.num_inputs * 4 + 15) / 16; + unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16); unsigned nr_clip_regs = 0; unsigned total_regs; @@ -55,7 +57,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) /* BRW_NEW_CLIP ? */ if (brw->attribs.Transform->ClipPlanesEnabled) { unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); - nr_clip_regs = (nr_planes * 4 + 15) / 16; + nr_clip_regs = align(nr_planes * 4, 16); } #endif @@ -172,28 +174,18 @@ static float fixed_plane[6][4] = { { 1, 0, 0, 1 } }; -#if 0 /* Upload a new set of constants. Too much variability to go into the * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ static void upload_constant_buffer(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; unsigned sz = brw->curbe.total_size; unsigned bufsz = sz * 16 * sizeof(float); float *buf; unsigned i; - /* Update our own dependency flags. This works because this - * function will also be called whenever fp or vp changes. - */ - brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); - brw->curbe.tracked_state.dirty.mesa |= vp->param_state; - brw->curbe.tracked_state.dirty.mesa |= fp->param_state; if (sz == 0) { struct brw_constant_buffer cb; @@ -220,10 +212,16 @@ static void upload_constant_buffer(struct brw_context *brw) if (brw->curbe.wm_size) { unsigned offset = brw->curbe.wm_start * 16; - _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* First the constant buffer constants: + */ + + /* Then any internally generated constants: + */ + for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++) + buf[offset + i] = brw->wm.prog_data->internal_const[i]; - for (i = 0; i < brw->wm.prog_data->nr_params; i++) - buf[offset + i] = brw->wm.prog_data->param[i][0]; + assert(brw->wm.prog_data->max_const == + brw->wm.prog_data->nr_internal_consts); } @@ -243,34 +241,26 @@ static void upload_constant_buffer(struct brw_context *brw) buf[offset + i * 4 + 3] = fixed_plane[i][3]; } - /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to - * clip-space: + /* Clip planes: BRW_NEW_CLIP: */ - assert(MAX_CLIP_PLANES == 6); - for (j = 0; j < MAX_CLIP_PLANES; j++) { - if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) { - buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0]; - buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1]; - buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2]; - buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3]; - i++; - } + for (j = 0; j < brw->attribs.Clip.nr; j++) { + buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0]; + buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1]; + buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2]; + buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3]; + i++; } } if (brw->curbe.vs_size) { - unsigned offset = brw->curbe.vs_start * 16; - unsigned nr = vp->program.Base.Parameters->NumParameters; +// unsigned offset = brw->curbe.vs_start * 16; +// unsigned nr = vp->max_const; - _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + /* map the vertex constant buffer and copy to curbe: */ - for (i = 0; i < nr; i++) { - buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0]; - buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1]; - buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2]; - buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3]; - } +// assert(nr == 0); + assert(0); } if (0) { @@ -309,7 +299,12 @@ static void upload_constant_buffer(struct brw_context *brw) /* Copy data to the buffer: */ - dri_bo_subdata(pool->buffer, brw->curbe.gs_offset, bufsz, buf); + brw->winsys->buffer_subdata_typed(brw->winsys, + pool->buffer, + brw->curbe.gs_offset, + bufsz, + buf, + BRW_CONSTANT_BUFFER ); } /* TODO: only emit the constant_buffer packet when necessary, ie: @@ -341,9 +336,7 @@ static void upload_constant_buffer(struct brw_context *brw) * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ -/* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */ BRW_BATCH_STRUCT(brw, &cb); -/* intel_batchbuffer_align(brw->intel.batch, 64, 0); */ } } @@ -355,9 +348,8 @@ static void upload_constant_buffer(struct brw_context *brw) */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */ - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_VERTEX_PROGRAM | + .brw = (BRW_NEW_CLIP | + BRW_NEW_CONSTANTS | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ BRW_NEW_CURBE_OFFSETS), @@ -366,4 +358,3 @@ const struct brw_tracked_state brw_constant_buffer = { .update = upload_constant_buffer }; -#endif diff --git a/src/mesa/pipe/i965simple/brw_draw.c b/src/mesa/pipe/i965simple/brw_draw.c index f443f41c6f..01c8ddb227 100644 --- a/src/mesa/pipe/i965simple/brw_draw.c +++ b/src/mesa/pipe/i965simple/brw_draw.c @@ -151,7 +151,6 @@ static boolean brw_try_draw_elements( struct pipe_context *pipe, unsigned count ) { struct brw_context *brw = brw_context(pipe); - boolean retval = FALSE; /* Set the first primitive ahead of validate_state: */ diff --git a/src/mesa/pipe/i965simple/brw_draw_upload.c b/src/mesa/pipe/i965simple/brw_draw_upload.c index 186a6274fa..79144837e8 100644 --- a/src/mesa/pipe/i965simple/brw_draw_upload.c +++ b/src/mesa/pipe/i965simple/brw_draw_upload.c @@ -207,25 +207,28 @@ static unsigned get_index_type(int type) boolean brw_upload_vertex_buffers( struct brw_context *brw ) { struct brw_array_state vbp; + unsigned nr_enabled = 0; unsigned i; - int nr_enabled = brw->vb.last_vb + 1; memset(&vbp, 0, sizeof(vbp)); /* This is a hardware limit: */ - if (nr_enabled >= BRW_VEP_MAX) - return FALSE; - for (i = 0; i < nr_enabled; i++) + for (i = 0; i < BRW_VEP_MAX; i++) { - vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i].pitch; + if (brw->vb.vbo_array[i]->buffer == NULL) { + nr_enabled = i; + break; + } + + vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->pitch; vbp.vb[i].vb0.bits.pad = 0; vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; vbp.vb[i].vb0.bits.vb_index = i; - vbp.vb[i].offset = brw->vb.vbo_array[i].buffer_offset; - vbp.vb[i].buffer = brw->vb.vbo_array[i].buffer; - vbp.vb[i].max_index = brw->vb.vbo_array[i].max_index; + vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset; + vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer; + vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index; } @@ -260,7 +263,7 @@ boolean brw_upload_vertex_elements( struct brw_context *brw ) for (i = 0; i < nr_enabled; i++) { struct brw_vertex_element *input = &brw->vb.inputs[i]; - switch (brw->vb.vbo_array[input->vep.ve0.vertex_buffer_index].pitch) { + switch (brw->vb.vbo_array[input->vep.ve0.vertex_buffer_index]->pitch) { case 0: input->vep.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0; case 1: input->vep.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; case 2: input->vep.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; diff --git a/src/mesa/pipe/i965simple/brw_eu.h b/src/mesa/pipe/i965simple/brw_eu.h index 111edb1506..23151ae9ed 100644 --- a/src/mesa/pipe/i965simple/brw_eu.h +++ b/src/mesa/pipe/i965simple/brw_eu.h @@ -694,6 +694,17 @@ void brw_init_compile( struct brw_compile *p ); const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ); +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ); + +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ); + /* Helpers for regular instructions: */ #define ALU1(OP) \ diff --git a/src/mesa/pipe/i965simple/brw_eu_emit.c b/src/mesa/pipe/i965simple/brw_eu_emit.c index bda63e8b9a..2423536dd1 100644 --- a/src/mesa/pipe/i965simple/brw_eu_emit.c +++ b/src/mesa/pipe/i965simple/brw_eu_emit.c @@ -363,10 +363,10 @@ static struct brw_instruction *next_insn( struct brw_compile *p, } -static struct brw_instruction *brw_alu1( struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src ) +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ) { struct brw_instruction *insn = next_insn(p, opcode); brw_set_dest(insn, dest); @@ -374,11 +374,11 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p, return insn; } -static struct brw_instruction *brw_alu2(struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1 ) +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) { struct brw_instruction *insn = next_insn(p, opcode); brw_set_dest(insn, dest); diff --git a/src/mesa/pipe/i965simple/brw_gs_state.c b/src/mesa/pipe/i965simple/brw_gs_state.c index 8e62eb4bd7..3932e9e939 100644 --- a/src/mesa/pipe/i965simple/brw_gs_state.c +++ b/src/mesa/pipe/i965simple/brw_gs_state.c @@ -34,6 +34,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "pipe/p_util.h" @@ -46,7 +47,7 @@ static void upload_gs_unit( struct brw_context *brw ) /* CACHE_NEW_GS_PROG */ if (brw->gs.prog_active) { gs.thread0.grf_reg_count = - ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1; + align(brw->gs.prog_data->total_grf, 16) / 16 - 1; gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; } diff --git a/src/mesa/pipe/i965simple/brw_misc_state.c b/src/mesa/pipe/i965simple/brw_misc_state.c index 0750502334..e600e9d8de 100644 --- a/src/mesa/pipe/i965simple/brw_misc_state.c +++ b/src/mesa/pipe/i965simple/brw_misc_state.c @@ -202,9 +202,7 @@ static void upload_depthbuffer(struct brw_context *brw) OUT_BATCH(((depth_surface->pitch * depth_surface->cpp) - 1) | (format << 18) | (BRW_TILEWALK_YMAJOR << 26) | -#if 0 - (depth_surface->region->tiled << 27) | -#endif +// (depth_surface->region->tiled << 27) | (BRW_SURFACE_2D << 29)); OUT_RELOC(depth_surface->buffer, PIPE_BUFFER_FLAG_READ | PIPE_BUFFER_FLAG_WRITE, 0); @@ -317,7 +315,7 @@ static void upload_pipe_control(struct brw_context *brw) const struct brw_tracked_state brw_pipe_control = { .dirty = { - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_SCENE, .cache = 0 }, .update = upload_pipe_control @@ -382,7 +380,7 @@ static void upload_invarient_state( struct brw_context *brw ) const struct brw_tracked_state brw_invarient_state = { .dirty = { - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_SCENE, .cache = 0 }, .update = upload_invarient_state @@ -418,7 +416,7 @@ static void upload_state_base_address( struct brw_context *brw ) const struct brw_tracked_state brw_state_base_address = { .dirty = { - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_SCENE, .cache = 0 }, .update = upload_state_base_address diff --git a/src/mesa/pipe/i965simple/brw_sf.c b/src/mesa/pipe/i965simple/brw_sf.c index f009ff37d9..e7c02beda5 100644 --- a/src/mesa/pipe/i965simple/brw_sf.c +++ b/src/mesa/pipe/i965simple/brw_sf.c @@ -36,9 +36,8 @@ #include "brw_util.h" #include "brw_sf.h" #include "brw_state.h" +#include "tgsi/util/tgsi_parse.h" -#if 0 -#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) @@ -46,7 +45,6 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_compile c; const unsigned *program; unsigned program_size; - unsigned i, idx; memset(&c, 0, sizeof(c)); @@ -55,27 +53,17 @@ static void compile_sf_prog( struct brw_context *brw, brw_init_compile(&c.func); c.key = *key; - c.nr_attrs = brw_count_bits(c.key.attrs); + + + c.nr_attrs = c.key.vp_output_count; c.nr_attr_regs = (c.nr_attrs+1)/2; - c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); + + c.nr_setup_attrs = c.key.fp_input_count; c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; - /* Construct map from attribute number to position in the vertex. - */ - for (i = idx = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<<i)) { - c.attr_to_idx[i] = idx; - c.idx_to_attr[idx] = i; - if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { - c.point_attrs[i].CoordReplace = - brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0]; - } else - c.point_attrs[i].CoordReplace = FALSE; - idx++; - } /* Which primitive? Or all three? */ @@ -90,21 +78,17 @@ static void compile_sf_prog( struct brw_context *brw, break; case SF_POINTS: c.nr_verts = 1; - if (key->do_point_sprite) - brw_emit_point_sprite_setup( &c ); - else - brw_emit_point_setup( &c ); + brw_emit_point_setup( &c ); break; + case SF_UNFILLED_TRIS: - c.nr_verts = 3; - brw_emit_anyprim_setup( &c ); - break; default: assert(0); return; } + /* get the program */ program = brw_get_program(&c.func, &program_size); @@ -142,20 +126,15 @@ static void upload_sf_prog( struct brw_context *brw ) /* Populate the key, noting state dependencies: */ /* CACHE_NEW_VS_PROG */ - key.attrs = brw->vs.prog_data->outputs_written; + key.vp_output_count = brw->vs.prog_data->outputs_written; /* BRW_NEW_REDUCED_PRIMITIVE */ switch (brw->reduced_primitive) { case PIPE_PRIM_TRIANGLES: - /* NOTE: We just use the edgeflag attribute as an indicator that - * unfilled triangles are active. We don't actually do the - * edgeflag testing here, it is already done in the clip - * program. - */ - if (key.attrs & (1<<VERT_RESULT_EDGE)) - key.primitive = SF_UNFILLED_TRIS; - else - key.primitive = SF_TRIANGLES; +// if (key.attrs & (1<<VERT_RESULT_EDGE)) +// key.primitive = SF_UNFILLED_TRIS; +// else + key.primitive = SF_TRIANGLES; break; case PIPE_PRIM_LINES: key.primitive = SF_LINES; @@ -165,16 +144,15 @@ static void upload_sf_prog( struct brw_context *brw ) break; } - /* BRW_NEW_POINT */ - key.do_point_sprite = brw->attribs.Point->PointSprite; - key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; - /* BRW_NEW_RASTER */ - key.do_flat_shading = (brw->attribs.Raster->flatshade); - key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); - /* _NEW_POLYGON */ - if (key.do_twoside_color) - key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); +// key.do_point_sprite = brw->attribs.Point->PointSprite; +// key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; + +// key.do_flat_shading = (brw->attribs.Raster->flatshade); +// key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); + +// if (key.do_twoside_color) +// key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); if (!search_cache(brw, &key)) @@ -184,11 +162,150 @@ static void upload_sf_prog( struct brw_context *brw ) const struct brw_tracked_state brw_sf_prog = { .dirty = { - .brw = (BRW_NEW_RASTER | - BRW_NEW_REDUCED_PRIMITIVE), - .cache = CACHE_NEW_VS_PROG + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_REDUCED_PRIMITIVE | + BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, }, .update = upload_sf_prog }; -#endif + +/* Build a struct like the one we'd like the state tracker to pass to + * us. + */ +static void update_sf_linkage( struct brw_context *brw ) +{ + const struct brw_vertex_program *vs = brw->attribs.VertexProgram; + const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; + struct pipe_setup_linkage state; + struct tgsi_parse_context parse; + + int i, j; + int nr_vp_outputs = 0; + int done = 0; + + struct { + unsigned semantic:8; + unsigned semantic_index:16; + } fp_semantic[32], vp_semantic[32]; + + memset(&state, 0, sizeof(state)); + + state.fp_input_count = 0; + + + + + /* First scan fp inputs + */ + tgsi_parse_init( &parse, fs->program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + + for (i = first; i < last; i++) { + state.fp_input[i].vp_output = ~0; + state.fp_input[i].bf_vp_output = ~0; + state.fp_input[i].interp_mode = + parse.FullToken.FullDeclaration.Interpolation.Interpolate; + + fp_semantic[i].semantic = + parse.FullToken.FullDeclaration.Semantic.SemanticName; + fp_semantic[i].semantic_index = + parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + + } + + assert(last > state.fp_input_count); + state.fp_input_count = last; + } + break; + default: + done = 1; + break; + } + } + + + assert(state.fp_input_count == fs->program.num_inputs); + + + /* Then scan vp outputs + */ + done = 0; + tgsi_parse_init( &parse, vs->program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + + for (i = first; i < last; i++) { + vp_semantic[i].semantic = + parse.FullToken.FullDeclaration.Semantic.SemanticName; + vp_semantic[i].semantic_index = + parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + } + + assert(last > nr_vp_outputs); + nr_vp_outputs = last; + } + break; + default: + done = 1; + break; + } + } + + + /* Now match based on semantic information. + */ + for (i = 0; i< state.fp_input_count; i++) { + for (j = 0; j < nr_vp_outputs; j++) { + if (fp_semantic[i].semantic == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].vp_output = j; + } + } + if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) { + for (j = 0; j < nr_vp_outputs; j++) { + if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].bf_vp_output = j; + } + } + } + } + + if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) { + brw->sf.linkage = state; + brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE; + } +} + + +const struct brw_tracked_state brw_sf_linkage = { + .dirty = { + .brw = (BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, + }, + .update = update_sf_linkage +}; + diff --git a/src/mesa/pipe/i965simple/brw_sf.h b/src/mesa/pipe/i965simple/brw_sf.h index d04388325d..b7ada47560 100644 --- a/src/mesa/pipe/i965simple/brw_sf.h +++ b/src/mesa/pipe/i965simple/brw_sf.h @@ -42,15 +42,26 @@ #define SF_TRIANGLES 2 #define SF_UNFILLED_TRIS 3 + + struct brw_sf_prog_key { - unsigned attrs:32; + unsigned vp_output_count:5; + unsigned fp_input_count:5; + unsigned primitive:2; unsigned do_twoside_color:1; unsigned do_flat_shading:1; unsigned frontface_ccw:1; unsigned do_point_sprite:1; - unsigned pad:10; - int SpriteOrigin; + + /* Interpolation masks; + */ + unsigned linear_mask; + unsigned persp_mask; + unsigned const_mask; + + +// int SpriteOrigin; }; struct brw_sf_point_tex { diff --git a/src/mesa/pipe/i965simple/brw_sf_emit.c b/src/mesa/pipe/i965simple/brw_sf_emit.c index 93f23171f2..834b5efdfe 100644 --- a/src/mesa/pipe/i965simple/brw_sf_emit.c +++ b/src/mesa/pipe/i965simple/brw_sf_emit.c @@ -36,171 +36,6 @@ #include "brw_util.h" #include "brw_sf.h" -#if 0 -static struct brw_reg get_vert_attr(struct brw_sf_compile *c, - struct brw_reg vert, - unsigned attr) -{ - unsigned off = c->attr_to_idx[attr] / 2; - unsigned sub = c->attr_to_idx[attr] % 2; - - return brw_vec4_grf(vert.nr + off, sub * 4); -} - -static boolean have_attr(struct brw_sf_compile *c, - unsigned attr) -{ - return (c->key.attrs & (1<<attr)) ? 1 : 0; -} - - - -/*********************************************************************** - * Twoside lighting - */ -static void copy_bfc( struct brw_sf_compile *c, - struct brw_reg vert ) -{ - struct brw_compile *p = &c->func; - unsigned i; - - for (i = 0; i < 2; i++) { - if (have_attr(c, VERT_RESULT_COL0+i) && - have_attr(c, VERT_RESULT_BFC0+i)) - brw_MOV(p, - get_vert_attr(c, vert, VERT_RESULT_COL0+i), - get_vert_attr(c, vert, VERT_RESULT_BFC0+i)); - } -} - - -static void do_twoside_color( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *if_insn; - unsigned backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L; - - /* Already done in clip program: - */ - if (c->key.primitive == SF_UNFILLED_TRIS) - return; - - /* XXX: What happens if BFC isn't present? This could only happen - * for user-supplied vertex programs, as t_vp_build.c always does - * the right thing. - */ - if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) && - !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1))) - return; - - /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order - * to get all channels active inside the IF. In the clipping code - * we run with NoMask, so it's not an option and we can use - * BRW_EXECUTE_1 for all comparisions. - */ - brw_push_insn_state(p); - brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0)); - if_insn = brw_IF(p, BRW_EXECUTE_4); - { - switch (c->nr_verts) { - case 3: copy_bfc(c, c->vert[2]); - case 2: copy_bfc(c, c->vert[1]); - case 1: copy_bfc(c, c->vert[0]); - } - } - brw_ENDIF(p, if_insn); - brw_pop_insn_state(p); -} - - - -/*********************************************************************** - * Flat shading - */ - -#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \ - (1<<VERT_RESULT_COL1)) - -static void copy_colors( struct brw_sf_compile *c, - struct brw_reg dst, - struct brw_reg src) -{ - struct brw_compile *p = &c->func; - unsigned i; - - for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) { - if (have_attr(c,i)) - brw_MOV(p, - get_vert_attr(c, dst, i), - get_vert_attr(c, src, i)); - } -} - - - -/* Need to use a computed jump to copy flatshaded attributes as the - * vertices are ordered according to y-coordinate before reaching this - * point, so the PV could be anywhere. - */ -static void do_flatshade_triangle( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg ip = brw_ip_reg(); - unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); - if (!nr) - return; - - /* Already done in clip program: - */ - if (c->key.primitive == SF_UNFILLED_TRIS) - return; - - brw_push_insn_state(p); - - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1)); - brw_JMPI(p, ip, ip, c->pv); - - copy_colors(c, c->vert[1], c->vert[0]); - copy_colors(c, c->vert[2], c->vert[0]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1)); - - copy_colors(c, c->vert[0], c->vert[1]); - copy_colors(c, c->vert[2], c->vert[1]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*2)); - - copy_colors(c, c->vert[0], c->vert[2]); - copy_colors(c, c->vert[1], c->vert[2]); - - brw_pop_insn_state(p); -} - - -static void do_flatshade_line( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg ip = brw_ip_reg(); - unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); - - if (!nr) - return; - - /* Already done in clip program: - */ - if (c->key.primitive == SF_UNFILLED_TRIS) - return; - - brw_push_insn_state(p); - - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1)); - brw_JMPI(p, ip, ip, c->pv); - copy_colors(c, c->vert[1], c->vert[0]); - - brw_JMPI(p, ip, ip, brw_imm_ud(nr)); - copy_colors(c, c->vert[0], c->vert[1]); - - brw_pop_insn_state(p); -} - /*********************************************************************** @@ -277,9 +112,6 @@ static void copy_z_inv_w( struct brw_sf_compile *c ) static void invert_det( struct brw_sf_compile *c) { - /* Looks like we invert all 8 elements just to get 1/det in - * position 2 !?! - */ brw_math(&c->func, c->inv_det, BRW_MATH_FUNCTION_INV, @@ -302,22 +134,16 @@ static boolean calculate_masks( struct brw_sf_compile *c, ushort *pc_linear) { boolean is_last_attr = (reg == c->nr_setup_regs - 1); - unsigned persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS; - unsigned linear_mask; - if (c->key.do_flat_shading) - linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1); - else - linear_mask = c->key.attrs; *pc_persp = 0; *pc_linear = 0; *pc = 0xf; - if (persp_mask & (1 << c->idx_to_attr[reg*2])) - *pc_persp = 0xf; +// if (persp_mask & (1 << c->idx_to_attr[reg*2])) +// *pc_persp = 0xf; - if (linear_mask & (1 << c->idx_to_attr[reg*2])) +// if (linear_mask & (1 << c->idx_to_attr[reg*2])) *pc_linear = 0xf; /* Maybe only processs one attribute on the final round: @@ -325,10 +151,10 @@ static boolean calculate_masks( struct brw_sf_compile *c, if (reg*2+1 < c->nr_setup_attrs) { *pc |= 0xf0; - if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) - *pc_persp |= 0xf0; +// if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) +// *pc_persp |= 0xf0; - if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) +// if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) *pc_linear |= 0xf0; } @@ -347,12 +173,6 @@ void brw_emit_tri_setup( struct brw_sf_compile *c ) invert_det(c); copy_z_inv_w(c); - if (c->key.do_twoside_color) - do_twoside_color(c); - - if (c->key.do_flat_shading) - do_flatshade_triangle(c); - for (i = 0; i < c->nr_setup_regs; i++) { @@ -433,9 +253,6 @@ void brw_emit_line_setup( struct brw_sf_compile *c ) invert_det(c); copy_z_inv_w(c); - if (c->key.do_flat_shading) - do_flatshade_line(c); - for (i = 0; i < c->nr_setup_regs; i++) { /* Pair of incoming attributes: @@ -491,86 +308,6 @@ void brw_emit_line_setup( struct brw_sf_compile *c ) } } -void brw_emit_point_sprite_setup( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - unsigned i; - - c->nr_verts = 1; - alloc_regs(c); - copy_z_inv_w(c); - for (i = 0; i < c->nr_setup_regs; i++) - { - struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; - struct brw_reg a0 = offset(c->vert[0], i); - ushort pc, pc_persp, pc_linear; - boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - if (!tex->CoordReplace) { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } - } - - if (tex->CoordReplace) { - /* Caculate 1.0/PointWidth */ - brw_math(&c->func, - c->tmp, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 0, - c->dx0, - BRW_MATH_DATA_SCALAR, - BRW_MATH_PRECISION_FULL); - - if (c->key.SpriteOrigin == GL_UPPER_LEFT) { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } else { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } - } else { - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); - } - - { - brw_set_predicate_control_flag_value(p, pc); - if (tex->CoordReplace) { - if (c->key.SpriteOrigin == GL_UPPER_LEFT) { - brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); - brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); - } - else - brw_MOV(p, c->m3C0, brw_imm_f(0.0)); - } else { - brw_MOV(p, c->m3C0, a0); /* constant value */ - } - - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); - } - } -} /* Points setup - several simplifications as all attributes are * constant across the face of the point (point sprites excluded!) @@ -629,68 +366,3 @@ void brw_emit_point_setup( struct brw_sf_compile *c ) } } } - -void brw_emit_anyprim_setup( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg ip = brw_ip_reg(); - struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); - struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); - struct brw_reg primmask; - struct brw_instruction *jmp; - struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); - - alloc_regs(c); - - primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); - - brw_MOV(p, primmask, brw_imm_ud(1)); - brw_SHL(p, primmask, primmask, payload_prim); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); - brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | - (1<<_3DPRIM_TRISTRIP) | - (1<<_3DPRIM_TRIFAN) | - (1<<_3DPRIM_TRISTRIP_REVERSE) | - (1<<_3DPRIM_POLYGON) | - (1<<_3DPRIM_RECTLIST) | - (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); - { - brw_push_insn_state(p); - brw_emit_tri_setup( c ); - brw_pop_insn_state(p); - /* note - thread killed in subroutine */ - } - brw_land_fwd_jump(p, jmp); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); - brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | - (1<<_3DPRIM_LINESTRIP) | - (1<<_3DPRIM_LINELOOP) | - (1<<_3DPRIM_LINESTRIP_CONT) | - (1<<_3DPRIM_LINESTRIP_BF) | - (1<<_3DPRIM_LINESTRIP_CONT_BF))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); - { - brw_push_insn_state(p); - brw_emit_line_setup( c ); - brw_pop_insn_state(p); - /* note - thread killed in subroutine */ - } - brw_land_fwd_jump(p, jmp); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); - brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); - { - brw_push_insn_state(p); - brw_emit_point_sprite_setup( c ); - brw_pop_insn_state(p); - } - brw_land_fwd_jump(p, jmp); - - brw_emit_point_setup( c ); -} - -#endif diff --git a/src/mesa/pipe/i965simple/brw_sf_state.c b/src/mesa/pipe/i965simple/brw_sf_state.c index 7b6ee215eb..0de6e7240e 100644 --- a/src/mesa/pipe/i965simple/brw_sf_state.c +++ b/src/mesa/pipe/i965simple/brw_sf_state.c @@ -34,91 +34,41 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "pipe/p_util.h" -#if 0 static void upload_sf_vp(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; struct brw_sf_viewport sfv; - struct intel_renderbuffer *irb = - intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0][0]); - float y_scale, y_bias; - int x, y, w, h, x1, x2, y1, y2; - int draw_h = ctx->DrawBuffer->Height; memset(&sfv, 0, sizeof(sfv)); - if (ctx->DrawBuffer->Name) { - /* User-created FBO */ - if (irb && !irb->RenderToTexture) { - y_scale = -1.0; - y_bias = draw_h; - } else { - y_scale = 1.0; - y_bias = 0; - } - } else { - if (brw->intel.driDrawable != NULL) { - y_scale = -1.0; - y_bias = draw_h; - } else { - y_scale = 1.0; - y_bias = 0; - } - } - /* _NEW_VIEWPORT, BRW_NEW_METAOPS */ - - if (!brw->metaops.active) { - const float *v = brw->intel.ctx.Viewport._WindowMap.m; - - sfv.viewport.m00 = v[MAT_SX]; - sfv.viewport.m11 = v[MAT_SY] * y_scale; - sfv.viewport.m22 = v[MAT_SZ] * brw->intel.depth_scale; - sfv.viewport.m30 = v[MAT_TX]; - sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; - sfv.viewport.m32 = v[MAT_TZ] * brw->intel.depth_scale; - } else { - sfv.viewport.m00 = 1; - sfv.viewport.m11 = - 1; - sfv.viewport.m22 = 1; - sfv.viewport.m30 = 0; - sfv.viewport.m31 = brw->intel.driDrawable->h; - sfv.viewport.m32 = 0; - } + /* BRW_NEW_VIEWPORT */ + { + const float *scale = brw->attribs.Viewport.scale; + const float *trans = brw->attribs.Viewport.translate; - /* _NEW_SCISSOR */ - x = brw->attribs.Scissor->X; - y = brw->attribs.Scissor->Y; - w = brw->attribs.Scissor->Width; - h = brw->attribs.Scissor->Height; - - if (ctx->DrawBuffer->Name == 0) { - x1 = x; - y1 = draw_h - (y + h); - x2 = x + w - 1; - y2 = y1 + h - 1; - } else { - /* FBO has non-inverted coords. */ - x1 = x; - y1 = y; - x2 = x + w - 1; - y2 = y + h - 1; + sfv.viewport.m00 = scale[0]; + sfv.viewport.m11 = scale[1]; + sfv.viewport.m22 = scale[2]; + sfv.viewport.m30 = trans[0]; + sfv.viewport.m31 = trans[1]; + sfv.viewport.m32 = trans[2]; } - sfv.scissor.xmin = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1); - sfv.scissor.xmax = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1); - sfv.scissor.ymin = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1); - sfv.scissor.ymax = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1); + /* _NEW_SCISSOR */ + sfv.scissor.xmin = brw->attribs.Scissor.minx; + sfv.scissor.xmax = brw->attribs.Scissor.maxx; + sfv.scissor.ymin = brw->attribs.Scissor.miny; + sfv.scissor.ymax = brw->attribs.Scissor.maxy; brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); } const struct brw_tracked_state brw_sf_vp = { .dirty = { - .mesa = (_NEW_VIEWPORT | - _NEW_SCISSOR), - .brw = BRW_NEW_METAOPS, + .brw = (BRW_NEW_SCISSOR | + BRW_NEW_VIEWPORT), .cache = 0 }, .update = upload_sf_vp @@ -130,7 +80,7 @@ static void upload_sf_unit( struct brw_context *brw ) memset(&sf, 0, sizeof(sf)); /* CACHE_NEW_SF_PROG */ - sf.thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1; + sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1; sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; @@ -151,19 +101,19 @@ static void upload_sf_unit( struct brw_context *brw ) /* CACHE_NEW_SF_VP */ sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; - sf.sf5.viewport_transform = 1; - /* _NEW_SCISSOR */ - if (brw->attribs.Scissor->Enabled) + /* BRW_NEW_RASTER */ + if (brw->attribs.Raster->scissor) sf.sf6.scissor = 1; - /* _NEW_POLYGON */ +#if 0 if (brw->attribs.Polygon->FrontFace == GL_CCW) sf.sf5.front_winding = BRW_FRONTWINDING_CCW; else sf.sf5.front_winding = BRW_FRONTWINDING_CW; + if (brw->attribs.Polygon->CullFlag) { switch (brw->attribs.Polygon->CullFaceMode) { case GL_FRONT: @@ -182,25 +132,24 @@ static void upload_sf_unit( struct brw_context *brw ) } else sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#else + sf.sf5.front_winding = BRW_FRONTWINDING_CW; + sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#endif - - /* _NEW_LINE */ - /* XXX use ctx->Const.Min/MaxLineWidth here */ - sf.sf6.line_width = CLAMP(brw->attribs.Line->Width, 1.0, 5.0) * (1<<1); + sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1); sf.sf6.line_endcap_aa_region_width = 1; - if (brw->attribs.Line->SmoothFlag) + if (brw->attribs.Raster->line_smooth) sf.sf6.aa_enable = 1; else if (sf.sf6.line_width <= 0x2) sf.sf6.line_width = 0; - /* _NEW_POINT */ sf.sf6.point_rast_rule = 1; /* opengl conventions */ - /* XXX clamp max depends on AA vs. non-AA */ - sf.sf7.sprite_point = brw->attribs.Point->PointSprite; - sf.sf7.point_size = CLAMP(brw->attribs.Point->Size, 1.0, 255.0) * (1<<3); - sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated; + sf.sf7.sprite_point = brw->attribs.Raster->point_sprite; + sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3); + sf.sf7.use_point_size_state = brw->attribs.Raster->point_size_per_vertex; /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ @@ -220,12 +169,8 @@ static void upload_sf_unit( struct brw_context *brw ) const struct brw_tracked_state brw_sf_unit = { .dirty = { - .mesa = (_NEW_POLYGON | - _NEW_LINE | - _NEW_POINT | - _NEW_SCISSOR), - .brw = (BRW_NEW_URB_FENCE | - BRW_NEW_METAOPS), + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_URB_FENCE), .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) }, @@ -233,4 +178,3 @@ const struct brw_tracked_state brw_sf_unit = { }; -#endif diff --git a/src/mesa/pipe/i965simple/brw_shader_info.c b/src/mesa/pipe/i965simple/brw_shader_info.c new file mode 100644 index 0000000000..431b45466a --- /dev/null +++ b/src/mesa/pipe/i965simple/brw_shader_info.c @@ -0,0 +1,49 @@ + +#include "brw_context.h" +#include "brw_state.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + + + + +void brw_shader_info(const struct tgsi_token *tokens, + struct brw_shader_info *info ) +{ + struct tgsi_parse_context parse; + int done = 0; + + tgsi_parse_init( &parse, tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned last = decl->u.DeclarationRange.Last; + + assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + + // Broken by crazy wpos init: + //assert( info->nr_regs[decl->Declaration.File] <= last); + + info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File], + last+1); + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + +} diff --git a/src/mesa/pipe/i965simple/brw_state.c b/src/mesa/pipe/i965simple/brw_state.c index ff4ae7999b..26450ae597 100644 --- a/src/mesa/pipe/i965simple/brw_state.c +++ b/src/mesa/pipe/i965simple/brw_state.c @@ -198,6 +198,13 @@ static void * brw_create_fs_state(struct pipe_context *pipe, /* XXX: Do I have to duplicate the tokens as well?? */ brw_fp->program = *shader; + brw_fp->id = brw_context(pipe)->program_id++; + + brw_shader_info(shader->tokens, + &brw_fp->info); + + tgsi_dump(shader->tokens, 0); + return (void *)brw_fp; } @@ -228,6 +235,9 @@ static void *brw_create_vs_state(struct pipe_context *pipe, /* XXX: Do I have to duplicate the tokens as well?? */ brw_vp->program = *shader; + brw_vp->id = brw_context(pipe)->program_id++; + brw_shader_info(shader->tokens, + &brw_vp->info); tgsi_dump(shader->tokens, 0); @@ -273,14 +283,11 @@ static void brw_set_viewport_state( struct pipe_context *pipe, static void brw_set_vertex_buffer( struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer ) + unsigned index, + const struct pipe_vertex_buffer *buffer ) { struct brw_context *brw = brw_context(pipe); - brw->vb.vbo_array[index] = *buffer; - if (index > brw->vb.last_vb) - brw->vb.last_vb = index; - assert(brw->vb.last_vb < BRW_VEP_MAX); + brw->vb.vbo_array[index] = buffer; } static void brw_set_vertex_element(struct pipe_context *pipe, diff --git a/src/mesa/pipe/i965simple/brw_state.h b/src/mesa/pipe/i965simple/brw_state.h index 4dabfe8082..d09711f6f0 100644 --- a/src/mesa/pipe/i965simple/brw_state.h +++ b/src/mesa/pipe/i965simple/brw_state.h @@ -154,4 +154,11 @@ void brw_upload_clip_prog(struct brw_context *brw); void brw_upload_blend_constant_color(struct brw_context *brw); void brw_upload_wm_samplers(struct brw_context *brw); +/* brw_shader_info.c + */ + +void brw_shader_info(const struct tgsi_token *tokens, + struct brw_shader_info *info ); + + #endif diff --git a/src/mesa/pipe/i965simple/brw_state_cache.c b/src/mesa/pipe/i965simple/brw_state_cache.c index 13e262d2e5..c5738733f4 100644 --- a/src/mesa/pipe/i965simple/brw_state_cache.c +++ b/src/mesa/pipe/i965simple/brw_state_cache.c @@ -178,8 +178,9 @@ unsigned brw_upload_cache( struct brw_cache *cache, if (BRW_DEBUG & DEBUG_STATE) printf("upload %s: %d bytes to pool buffer %p offset %x\n", - cache->name, data_size, - cache->pool->buffer, + cache->name, + data_size, + (void*)cache->pool->buffer, offset); /* Copy data to the buffer: diff --git a/src/mesa/pipe/i965simple/brw_state_pool.c b/src/mesa/pipe/i965simple/brw_state_pool.c index a490049024..78268ed8f2 100644 --- a/src/mesa/pipe/i965simple/brw_state_pool.c +++ b/src/mesa/pipe/i965simple/brw_state_pool.c @@ -43,17 +43,18 @@ */ #include "pipe/p_winsys.h" +#include "pipe/p_util.h" #include "brw_context.h" #include "brw_state.h" boolean brw_pool_alloc( struct brw_mem_pool *pool, unsigned size, - unsigned align, + unsigned alignment, unsigned *offset_return) { - unsigned fixup = ALIGN(pool->offset, align) - pool->offset; + unsigned fixup = align(pool->offset, alignment) - pool->offset; - size = ALIGN(size, 4); + size = align(size, 4); if (pool->offset + fixup + size >= pool->size) { printf("%s failed\n", __FUNCTION__); @@ -114,7 +115,7 @@ void brw_pool_check_wrap( struct brw_context *brw, struct brw_mem_pool *pool ) { if (pool->offset > (pool->size * 3) / 4) { - brw->state.dirty.brw |= BRW_NEW_CONTEXT; + brw->state.dirty.brw |= BRW_NEW_SCENE; } } diff --git a/src/mesa/pipe/i965simple/brw_state_upload.c b/src/mesa/pipe/i965simple/brw_state_upload.c index 1ca7484958..1fb480172d 100644 --- a/src/mesa/pipe/i965simple/brw_state_upload.c +++ b/src/mesa/pipe/i965simple/brw_state_upload.c @@ -97,39 +97,16 @@ const struct brw_tracked_state *atoms[] = void brw_init_state( struct brw_context *brw ) { - unsigned i; - brw_init_pools(brw); brw_init_caches(brw); - brw->state.atoms = MALLOC(sizeof(atoms)); - brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); - memcpy(brw->state.atoms, atoms, sizeof(atoms)); - - /* Patch in a pointer to the dynamic state atom: - */ - for (i = 0; i < brw->state.nr_atoms; i++) - if (brw->state.atoms[i] == NULL) - brw->state.atoms[i] = &brw->curbe.tracked_state; - - memcpy(&brw->curbe.tracked_state, - &brw_constant_buffer, - sizeof(brw_constant_buffer)); - brw->state.dirty.brw = ~0; brw->emit_state_always = 0; - - } void brw_destroy_state( struct brw_context *brw ) { - if (brw->state.atoms) { - FREE(brw->state.atoms); - brw->state.atoms = NULL; - } - brw_destroy_caches(brw); brw_destroy_batch_cache(brw); brw_destroy_pools(brw); @@ -177,7 +154,7 @@ void brw_validate_state( struct brw_context *brw ) state->brw == 0) return; - if (brw->state.dirty.brw & BRW_NEW_CONTEXT) + if (brw->state.dirty.brw & BRW_NEW_SCENE) brw_clear_batch_cache_flush(brw); if (BRW_DEBUG) { @@ -189,21 +166,17 @@ void brw_validate_state( struct brw_context *brw ) memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < brw->state.nr_atoms; i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; assert(atom->dirty.brw || atom->dirty.cache); assert(atom->update); - if (check_state(state, &atom->dirty) || atom->always_update) { + if (check_state(state, &atom->dirty)) { atom->update( brw ); - -/* emit_foo(brw); */ } - if (atom->emit_reloc != NULL) - atom->emit_reloc(brw); accumulate_state(&examined, &atom->dirty); @@ -218,12 +191,10 @@ void brw_validate_state( struct brw_context *brw ) } else { for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; - if (check_state(state, &atom->dirty) || atom->always_update) + if (check_state(state, &atom->dirty)) atom->update( brw ); - if (atom->emit_reloc != NULL) - atom->emit_reloc(brw); } } diff --git a/src/mesa/pipe/i965simple/brw_tex_layout.c b/src/mesa/pipe/i965simple/brw_tex_layout.c index b9514be0c2..7d6e2851b1 100644 --- a/src/mesa/pipe/i965simple/brw_tex_layout.c +++ b/src/mesa/pipe/i965simple/brw_tex_layout.c @@ -149,10 +149,10 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) unsigned mip1_width; if (pt->compressed) { - mip1_width = ALIGN(minify(pt->width[0]), align_w) - + ALIGN(minify(minify(pt->width[0])), align_w); + mip1_width = align(minify(pt->width[0]), align_w) + + align(minify(minify(pt->width[0])), align_w); } else { - mip1_width = ALIGN(minify(pt->width[0]), align_w) + mip1_width = align(minify(pt->width[0]), align_w) + minify(minify(pt->width[0])); } @@ -164,7 +164,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) /* Pitch must be a whole number of dwords, even though we * express it in texels. */ - tex->pitch = ALIGN(tex->pitch * pt->cpp, 4) / pt->cpp; + tex->pitch = align(tex->pitch * pt->cpp, 4) / pt->cpp; tex->total_height = 0; for ( level = pt->first_level ; level <= pt->last_level ; level++ ) { @@ -176,7 +176,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) if (pt->compressed) img_height = MAX2(1, height/4); else - img_height = ALIGN(height, align_h); + img_height = align(height, align_h); /* Because the images are packed better, the final offset @@ -187,7 +187,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) /* Layout_below: step right after second mipmap. */ if (level == pt->first_level + 1) { - x += ALIGN(width, align_w); + x += align(width, align_w); } else { y += img_height; @@ -221,13 +221,13 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture #if 0 if (pt->compressed) { align_w = intel_compressed_alignment(pt->internal_format); - pt->pitch = ALIGN(width, align_w); + pt->pitch = align(width, align_w); pack_y_pitch = (height + 3) / 4; } else #endif { - tex->pitch = ALIGN(pt->width[0] * pt->cpp, 4) / pt->cpp; - pack_y_pitch = ALIGN(pt->height[0], align_h); + tex->pitch = align(pt->width[0] * pt->cpp, 4) / pt->cpp; + pack_y_pitch = align(pt->height[0], align_h); } pack_x_pitch = tex->pitch; @@ -262,8 +262,8 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture if (pt->compressed) { pack_y_pitch = (height + 3) / 4; - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); + if (pack_x_pitch > align(width, align_w)) { + pack_x_pitch = align(width, align_w); pack_x_nr <<= 1; } } else { @@ -275,7 +275,7 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture if (pack_y_pitch > 2) { pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); + pack_y_pitch = align(pack_y_pitch, align_h); } } @@ -305,8 +305,6 @@ brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt) sizeof(struct brw_texture)); if (tex) { - struct brw_context *brw = brw_context(pipe); - memset(&tex->base + 1, 0, sizeof(struct brw_texture) - sizeof(struct pipe_texture)); diff --git a/src/mesa/pipe/i965simple/brw_vs.c b/src/mesa/pipe/i965simple/brw_vs.c index 33c6624214..738c6346d5 100644 --- a/src/mesa/pipe/i965simple/brw_vs.c +++ b/src/mesa/pipe/i965simple/brw_vs.c @@ -97,13 +97,6 @@ static void brw_upload_vs_prog( struct brw_context *brw ) key.copy_edgeflag = (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL); -#if 0 - /* BRW_NEW_METAOPS - */ - if (brw->metaops.active) - key.know_w_is_one = 1; -#endif - /* Make an early check for the key. */ if (brw_search_cache(&brw->cache[BRW_VS_PROG], @@ -120,9 +113,6 @@ static void brw_upload_vs_prog( struct brw_context *brw ) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { -#if 0 - .brw = BRW_NEW_VS | BRW_NEW_METAOPS, -#endif .brw = BRW_NEW_VS, .cache = 0 }, diff --git a/src/mesa/pipe/i965simple/brw_vs_constval.c b/src/mesa/pipe/i965simple/brw_vs_constval.c deleted file mode 100644 index de43e72c1d..0000000000 --- a/src/mesa/pipe/i965simple/brw_vs_constval.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_vs.h" - -#if 0 -/* Component is active if it may diverge from [0,0,0,1]. Undef values - * are promoted to [0,0,0,1] for the purposes of this analysis. - */ -struct tracker { - boolean twoside; - ubyte active[PROGRAM_OUTPUT+1][128]; - unsigned size_masks[4]; -}; - - -static void set_active_component( struct tracker *t, - unsigned file, - unsigned index, - ubyte active ) -{ - switch (file) { - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - case PROGRAM_OUTPUT: - t->active[file][index] |= active; - - default: - break; - } -} - -static void set_active( struct tracker *t, - struct prog_dst_register dst, - unsigned active ) -{ - set_active_component( t, dst.File, dst.Index, active & dst.WriteMask ); -} - - -static ubyte get_active_component( struct tracker *t, - unsigned file, - unsigned index, - unsigned component, - ubyte swz ) -{ - switch (swz) { - case SWIZZLE_ZERO: - return component < 3 ? 0 : (1<<component); - case SWIZZLE_ONE: - return component == 3 ? 0 : (1<<component); - default: - switch (file) { - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - case PROGRAM_OUTPUT: - return t->active[file][index] & (1<<component); - default: - return 1 << component; - } - } -} - - -static ubyte get_active( struct tracker *t, - struct prog_src_register src ) -{ - unsigned i; - ubyte active = src.NegateBase; /* NOTE! */ - - if (src.RelAddr) - return 0xf; - - for (i = 0; i < 4; i++) - active |= get_active_component(t, src.File, src.Index, i, - GET_SWZ(src.Swizzle, i)); - - return active; -} - -static ubyte get_output_size( struct tracker *t, - unsigned idx ) -{ - ubyte active = t->active[PROGRAM_OUTPUT][idx]; - if (active & (1<<3)) return 4; - if (active & (1<<2)) return 3; - if (active & (1<<1)) return 2; - if (active & (1<<0)) return 1; - return 0; -} - -/* Note the potential copying that occurs in the setup program: - */ -static void calc_sizes( struct tracker *t ) -{ - unsigned i; - - if (t->twoside) { - t->active[PROGRAM_OUTPUT][VERT_RESULT_COL0] |= - t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC0]; - - t->active[PROGRAM_OUTPUT][VERT_RESULT_COL1] |= - t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC1]; - } - - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - switch (get_output_size(t, i)) { - case 4: t->size_masks[4-1] |= 1<<i; - case 3: t->size_masks[3-1] |= 1<<i; - case 2: t->size_masks[2-1] |= 1<<i; - case 1: t->size_masks[1-1] |= 1<<i; - break; - } - } -} - -static ubyte szflag[4+1] = { - 0, - 0x1, - 0x3, - 0x7, - 0xf -}; - -/* Pull a size out of the packed array: - */ -static unsigned get_input_size(struct brw_context *brw, - unsigned attr) -{ - unsigned sizes_dword = brw->vb.info.sizes[attr/16]; - unsigned sizes_bits = (sizes_dword>>((attr%16)*2)) & 0x3; - return sizes_bits + 1; -/* return brw->vb.inputs[attr].glarray->Size; */ -} - -/* Calculate sizes of vertex program outputs. Size is the largest - * component index which might vary from [0,0,0,1] - */ -static void calc_wm_input_sizes( struct brw_context *brw ) -{ - /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = - (struct brw_vertex_program *)brw->vertex_program; - /* BRW_NEW_INPUT_DIMENSIONS */ - struct tracker t; - unsigned insn; - unsigned i; - - memset(&t, 0, sizeof(t)); - - /* _NEW_LIGHT */ - if (brw->attribs.Light->Model.TwoSide) - t.twoside = 1; - - for (i = 0; i < PIPE_ATTRIB_MAX; i++) - if (vp->program.Base.InputsRead & (1<<i)) - set_active_component(&t, PROGRAM_INPUT, i, - szflag[get_input_size(brw, i)]); - - for (insn = 0; insn < vp->program.Base.NumInstructions; insn++) { - struct prog_instruction *inst = &vp->program.Base.Instructions[insn]; - - switch (inst->Opcode) { - case OPCODE_ARL: - break; - - case OPCODE_MOV: - set_active(&t, inst->DstReg, get_active(&t, inst->SrcReg[0])); - break; - - default: - set_active(&t, inst->DstReg, 0xf); - break; - } - } - - calc_sizes(&t); - - if (memcmp(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)) != 0) { - memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)); - brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS; - } -} - -const struct brw_tracked_state brw_wm_input_sizes = { - .dirty = { - .mesa = _NEW_LIGHT, - .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, - .cache = 0 - }, - .update = calc_wm_input_sizes -}; -#endif diff --git a/src/mesa/pipe/i965simple/brw_vs_emit.c b/src/mesa/pipe/i965simple/brw_vs_emit.c index 59459d4200..530e17a736 100644 --- a/src/mesa/pipe/i965simple/brw_vs_emit.c +++ b/src/mesa/pipe/i965simple/brw_vs_emit.c @@ -103,28 +103,26 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c, c->first_output = reg; mrf = 4; for (i = 0; i < c->vp->program.num_outputs; i++) { - if (c->prog_data.outputs_written & (1<<i)) { - c->nr_outputs++; + c->nr_outputs++; #if 0 - if (i == VERT_RESULT_HPOS) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } - else if (i == VERT_RESULT_PSIZ) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ - } - else { - c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); - mrf++; - } -#else - /* for now stuff everything in grf */ + if (i == VERT_RESULT_HPOS) { c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; -#endif } + else if (i == VERT_RESULT_PSIZ) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + mrf++; /* just a placeholder? XXX fix later stages & remove this */ + } + else { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } +#else + /* for now stuff everything in grf */ + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; +#endif } /* Allocate program temporaries: @@ -627,11 +625,9 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, case TGSI_FILE_TEMPORARY: case TGSI_FILE_INPUT: case TGSI_FILE_OUTPUT: + case TGSI_FILE_CONSTANT: assert(c->regs[file][index].nr != 0); return c->regs[file][index]; - case TGSI_FILE_CONSTANT: - assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0); - return c->regs[TGSI_FILE_CONSTANT][index]; case TGSI_FILE_ADDRESS: assert(index == 0); return c->regs[file][index]; diff --git a/src/mesa/pipe/i965simple/brw_vs_state.c b/src/mesa/pipe/i965simple/brw_vs_state.c index 7d6fb383b9..c73469929c 100644 --- a/src/mesa/pipe/i965simple/brw_vs_state.c +++ b/src/mesa/pipe/i965simple/brw_vs_state.c @@ -44,7 +44,7 @@ static void upload_vs_unit( struct brw_context *brw ) /* CACHE_NEW_VS_PROG */ vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; - vs.thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; + vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; vs.thread3.dispatch_grf_start_reg = 1; diff --git a/src/mesa/pipe/i965simple/brw_vtbl.c b/src/mesa/pipe/i965simple/brw_vtbl.c deleted file mode 100644 index 6dc3bd838b..0000000000 --- a/src/mesa/pipe/i965simple/brw_vtbl.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" - -#include "brw_draw.h" -#include "brw_state.h" -#include "brw_vs.h" -#include <stdarg.h> - -#if 0 -/* called from intelDestroyContext() - */ -static void brw_destroy_context( struct intel_context *intel ) -{ - GLcontext *ctx = &intel->ctx; - struct brw_context *brw = brw_context(&intel->ctx); - - brw_destroy_metaops(brw); - brw_destroy_state(brw); - brw_draw_destroy( brw ); - - brw_ProgramCacheDestroy( ctx ); - brw_FrameBufferTexDestroy( brw ); -} - -/* called from intelDrawBuffer() - */ -static void brw_set_draw_region( struct intel_context *intel, - struct intel_region *draw_region, - struct intel_region *depth_region) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - intel_region_release(&brw->state.draw_region); - intel_region_release(&brw->state.depth_region); - intel_region_reference(&brw->state.draw_region, draw_region); - intel_region_reference(&brw->state.depth_region, depth_region); -} - - -/* called from intelFlushBatchLocked - */ -static void brw_lost_hardware( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - /* Note that we effectively lose the context after this. - * - * Setting this flag provokes a state buffer wrap and also flushes - * the hardware caches. - */ - brw->state.dirty.brw |= BRW_NEW_CONTEXT; - - /* Which means there shouldn't be any commands already queued: - */ - assert(intel->batch->ptr == intel->batch->map); - - brw->state.dirty.mesa |= ~0; - brw->state.dirty.brw |= ~0; - brw->state.dirty.cache |= ~0; -} - -static void brw_note_fence( struct intel_context *intel, - unsigned fence ) -{ - brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; -} - -static void brw_note_unlock( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - brw_pool_check_wrap(brw, &brw->pool[BRW_GS_POOL]); - brw_pool_check_wrap(brw, &brw->pool[BRW_SS_POOL]); - - brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_LOCK; -} - - -void brw_do_flush( struct brw_context *brw, - unsigned flags ) -{ - struct brw_mi_flush flush; - memset(&flush, 0, sizeof(flush)); - flush.opcode = CMD_MI_FLUSH; - flush.flags = flags; - BRW_BATCH_STRUCT(brw, &flush); -} - - -static void brw_emit_flush( struct intel_context *intel, - unsigned unused ) -{ - brw_do_flush(brw_context(&intel->ctx), - BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); -} - - -/* called from intelWaitForIdle() and intelFlush() - * - * For now, just flush everything. Could be smarter later. - */ -static unsigned brw_flush_cmd( void ) -{ - struct brw_mi_flush flush; - flush.opcode = CMD_MI_FLUSH; - flush.pad = 0; - flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE; - return *(unsigned *)&flush; -} - -static void brw_invalidate_state( struct intel_context *intel, unsigned new_state ) -{ - /* nothing */ -} -#endif diff --git a/src/mesa/pipe/i965simple/brw_winsys.h b/src/mesa/pipe/i965simple/brw_winsys.h index cc0a210a9f..49a12a1c27 100644 --- a/src/mesa/pipe/i965simple/brw_winsys.h +++ b/src/mesa/pipe/i965simple/brw_winsys.h @@ -95,6 +95,8 @@ enum brw_cache_id { BRW_MAX_CACHE }; +#define BRW_CONSTANT_BUFFER BRW_MAX_CACHE + /** * Additional winsys interface for i965simple. * @@ -164,6 +166,13 @@ struct brw_winsys { unsigned data_type); + /* A cheat so we don't have to think about relocations in a couple + * of places yet: + */ + unsigned (*get_buffer_offset)( struct brw_winsys *sws, + struct pipe_buffer_handle *buf, + unsigned flags ); + }; #define BRW_BUFFER_ACCESS_WRITE 0x1 diff --git a/src/mesa/pipe/i965simple/brw_wm.c b/src/mesa/pipe/i965simple/brw_wm.c index 65271f22fd..f0a38d384b 100644 --- a/src/mesa/pipe/i965simple/brw_wm.c +++ b/src/mesa/pipe/i965simple/brw_wm.c @@ -33,153 +33,34 @@ #include "brw_context.h" #include "brw_util.h" #include "brw_wm.h" +#include "brw_eu.h" #include "brw_state.h" +#include "pipe/p_util.h" -unsigned brw_wm_nr_args( unsigned opcode ) -{ - switch (opcode) { - - case WM_PIXELXY: - case TGSI_OPCODE_ABS: - case TGSI_OPCODE_FLR: - case TGSI_OPCODE_FRC: - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_COS: - case TGSI_OPCODE_EX2: - case TGSI_OPCODE_LG2: - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_SIN: - case TGSI_OPCODE_SCS: - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXB: - case TGSI_OPCODE_TXD: - case TGSI_OPCODE_KIL: - case TGSI_OPCODE_LIT: - case WM_CINTERP: - case WM_WPOSXY: - return 1; - - case TGSI_OPCODE_POW: - case TGSI_OPCODE_SUB: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MAX: - case TGSI_OPCODE_MIN: - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_XPD: - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - case TGSI_OPCODE_DST: - case WM_LINTERP: - case WM_DELTAXY: - case WM_PIXELW: - return 2; - - case WM_FB_WRITE: - case WM_PINTERP: - case TGSI_OPCODE_MAD: - case TGSI_OPCODE_CMP: - case TGSI_OPCODE_LRP: - return 3; - - default: - return 0; - } -} - - -unsigned brw_wm_is_scalar_result( unsigned opcode ) -{ - switch (opcode) { - case TGSI_OPCODE_COS: - case TGSI_OPCODE_EX2: - case TGSI_OPCODE_LG2: - case TGSI_OPCODE_POW: - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_SIN: - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - case TGSI_OPCODE_DST: - return 1; - - default: - return 0; - } -} static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) { - struct brw_wm_compile *c; + struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile); const unsigned *program; unsigned program_size; - c = brw->wm.compile_data; - if (c == NULL) { - brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); - c = brw->wm.compile_data; - } else { - memset(c, 0, sizeof(*brw->wm.compile_data)); - } - memcpy(&c->key, key, sizeof(*key)); - + c->key = *key; c->fp = fp; + + c->delta_xy[0] = brw_null_reg(); + c->delta_xy[1] = brw_null_reg(); + c->pixel_xy[0] = brw_null_reg(); + c->pixel_xy[1] = brw_null_reg(); + c->pixel_w = brw_null_reg(); + + fprintf(stderr, "XXXXXXXX FP\n"); -#if 0 - c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - - if (brw_wm_is_glsl(&c->fp->program)) { - brw_wm_glsl_emit(c); - } else - { - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); - - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Register allocation. - */ - c->grf_limit = BRW_WM_MAX_GRF/2; - - /* This is where we start emitting gen4 code: - */ - brw_init_compile(&c->func); - - brw_wm_pass2(c); - - c->prog_data.total_grf = c->max_wm_grf; - if (c->last_scratch) { - c->prog_data.total_scratch = - c->last_scratch + 0x40; - } else { - c->prog_data.total_scratch = 0; - } - - /* Emit GEN4 code. - */ - brw_wm_emit(c); - } + brw_wm_glsl_emit(c); + /* get the program */ program = brw_get_program(&c->func, &program_size); @@ -193,7 +74,8 @@ static void do_wm_prog( struct brw_context *brw, program_size, &c->prog_data, &brw->wm.prog_data ); -#endif + + FREE(c); } @@ -206,8 +88,7 @@ static void brw_wm_populate_key( struct brw_context *brw, (struct brw_fragment_program *)brw->attribs.FragmentProgram; unsigned lookup = 0; unsigned line_aa; - unsigned i; - + memset(key, 0, sizeof(*key)); /* Build the index for table lookup @@ -274,14 +155,10 @@ static void brw_wm_populate_key( struct brw_context *brw, #if 0 - /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS); -#endif - - /* _NEW_LIGHT */ - key->flat_shade = (brw->attribs.Raster->flatshade); - - /* _NEW_TEXTURE */ + /* BRW_NEW_SAMPLER + * + * Not doing any of this at the moment: + */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct pipe_sampler_state *unit = brw->attribs.Samplers[i]; @@ -291,12 +168,11 @@ static void brw_wm_populate_key( struct brw_context *brw, unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { key->shadowtex_mask |= 1<<i; } -#if 0 if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) key->yuvtex_mask |= 1<<i; -#endif } } +#endif /* Extra info: @@ -329,7 +205,6 @@ static void brw_upload_wm_prog( struct brw_context *brw ) const struct brw_tracked_state brw_wm_prog = { .dirty = { .brw = (BRW_NEW_FS | - BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), .cache = 0 }, diff --git a/src/mesa/pipe/i965simple/brw_wm.h b/src/mesa/pipe/i965simple/brw_wm.h index a394e25da3..a1ac0f504a 100644 --- a/src/mesa/pipe/i965simple/brw_wm.h +++ b/src/mesa/pipe/i965simple/brw_wm.h @@ -60,86 +60,19 @@ struct brw_wm_prog_key { unsigned aa_dest_stencil_reg:3; unsigned dest_depth_reg:3; unsigned nr_depth_regs:3; - unsigned projtex_mask:8; unsigned shadowtex_mask:8; unsigned computes_depth:1; /* could be derived from program string */ unsigned source_depth_to_render_target:1; - unsigned flat_shade:1; unsigned runtime_check_aads_emit:1; - - unsigned yuvtex_mask:8; - unsigned pad1:24; - - unsigned program_string_id:32; -}; - - -/* A bit of a glossary: - * - * brw_wm_value: A computed value or program input. Values are - * constant, they are created once and are never modified. When a - * fragment program register is written or overwritten, new values are - * created fresh, preserving the rule that values are constant. - * - * brw_wm_ref: A reference to a value. Wherever a value used is by an - * instruction or as a program output, that is tracked with an - * instance of this struct. All references to a value occur after it - * is created. After the last reference, a value is dead and can be - * discarded. - * - * brw_wm_grf: Represents a physical hardware register. May be either - * empty or hold a value. Register allocation is the process of - * assigning values to grf registers. This occurs in pass2 and the - * brw_wm_grf struct is not used before that. - * - * Fragment program registers: These are time-varying constructs that - * are hard to reason about and which we translate away in pass0. A - * single fragment program register element (eg. temp[0].x) will be - * translated to one or more brw_wm_value structs, one for each time - * that temp[0].x is written to during the program. - */ - - -/* Used in pass2 to track register allocation. - */ -struct brw_wm_grf { - struct brw_wm_value *value; - unsigned nextuse; -}; + unsigned yuvtex_mask:8; -struct brw_wm_value { - struct brw_reg hw_reg; /* emitted to this reg, may not always be there */ - struct brw_wm_ref *lastuse; - struct brw_wm_grf *resident; - unsigned contributes_to_output:1; - unsigned spill_slot:16; /* if non-zero, spill immediately after calculation */ + unsigned program_string_id; }; -struct brw_wm_ref { - struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */ - struct brw_wm_value *value; - struct brw_wm_ref *prevuse; - unsigned unspill_reg:7; /* unspill to reg */ - unsigned emitted:1; - unsigned insn:24; -}; -struct brw_wm_constref { - const struct brw_wm_ref *ref; - float constval; -}; -struct brw_wm_instruction { - struct brw_wm_value *dst[4]; - struct brw_wm_ref *src[3][4]; - unsigned opcode:8; - unsigned saturate:1; - unsigned writemask:4; - unsigned tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ - unsigned tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ -}; #define PROGRAM_INTERNAL_PARAM #define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ @@ -151,124 +84,59 @@ struct brw_wm_instruction { #define BRW_WM_MAX_CONST 256 #define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS - - -/* New opcodes to track internal operations required for WM unit. - * These are added early so that the registers used can be tracked, - * freed and reused like those of other instructions. - */ -#define WM_PIXELXY (TGSI_OPCODE_LAST) -#define WM_DELTAXY (TGSI_OPCODE_LAST + 1) -#define WM_PIXELW (TGSI_OPCODE_LAST + 2) -#define WM_LINTERP (TGSI_OPCODE_LAST + 3) -#define WM_PINTERP (TGSI_OPCODE_LAST + 4) -#define WM_CINTERP (TGSI_OPCODE_LAST + 5) -#define WM_WPOSXY (TGSI_OPCODE_LAST + 6) -#define WM_FB_WRITE (TGSI_OPCODE_LAST + 7) -#define MAX_WM_OPCODE (TGSI_OPCODE_LAST + 8) - #define PAYLOAD_DEPTH (PIPE_ATTRIB_MAX) +#define MAX_IFSN 32 +#define MAX_LOOP_DEPTH 32 + struct brw_wm_compile { struct brw_compile func; struct brw_wm_prog_key key; - struct brw_wm_prog_data prog_data; + struct brw_wm_prog_data prog_data; /* result */ struct brw_fragment_program *fp; - float (*env_param)[4]; - - enum { - START, - PASS2_DONE - } state; - - /* Initial pass - translate fp instructions to fp instructions, - * simplifying and adding instructions for interpolation and - * framebuffer writes. - */ - const struct pipe_shader_state *prog_instructions; - unsigned nr_fp_insns; - unsigned fp_temp; - unsigned fp_interp_emitted; - unsigned fp_deriv_emitted; - - struct tgsi_src_register pixel_xy; - struct tgsi_src_register delta_xy; - struct tgsi_src_register pixel_w; - - - struct brw_wm_value vreg[BRW_WM_MAX_VREG]; - unsigned nr_vreg; - - struct brw_wm_value creg[BRW_WM_MAX_PARAM]; - unsigned nr_creg; + unsigned grf_limit; + unsigned max_wm_grf; - struct { - struct brw_wm_value depth[4]; /* includes r0/r1 */ - struct brw_wm_value input_interp[PIPE_ATTRIB_MAX]; - } payload; + struct brw_reg pixel_xy[2]; + struct brw_reg delta_xy[2]; + struct brw_reg pixel_w; - const struct brw_wm_ref *pass0_fp_reg[16][256][4]; - struct brw_wm_ref undef_ref; - struct brw_wm_value undef_value; + struct brw_reg wm_regs[8][32][4]; - struct brw_wm_ref refs[BRW_WM_MAX_REF]; - unsigned nr_refs; + struct brw_reg payload_depth[4]; + struct brw_reg payload_coef[16]; - struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; - unsigned nr_insns; + struct brw_reg emit_mask_reg; - struct brw_wm_constref constref[BRW_WM_MAX_CONST]; - unsigned nr_constrefs; + struct brw_instruction *if_inst[MAX_IFSN]; + int if_insn; - struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2]; + struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; + int loop_insn; - unsigned grf_limit; - unsigned max_wm_grf; - unsigned last_scratch; + struct brw_instruction *inst0; + struct brw_instruction *inst1; - struct { - boolean inited; - struct brw_reg reg; - } wm_regs[16][256][4]; struct brw_reg stack; - struct brw_reg emit_mask_reg; + struct brw_indirect stack_index; + unsigned reg_index; + + unsigned tmp_start; unsigned tmp_index; }; -unsigned brw_wm_nr_args( unsigned opcode ); -unsigned brw_wm_is_scalar_result( unsigned opcode ); - -void brw_wm_pass_fp( struct brw_wm_compile *c ); -void brw_wm_pass0( struct brw_wm_compile *c ); -void brw_wm_pass1( struct brw_wm_compile *c ); -void brw_wm_pass2( struct brw_wm_compile *c ); -void brw_wm_emit( struct brw_wm_compile *c ); - -void brw_wm_print_value( struct brw_wm_compile *c, - struct brw_wm_value *value ); - -void brw_wm_print_ref( struct brw_wm_compile *c, - struct brw_wm_ref *ref ); - -void brw_wm_print_insn( struct brw_wm_compile *c, - struct brw_wm_instruction *inst ); - -void brw_wm_print_program( struct brw_wm_compile *c, - const char *stage ); void brw_wm_lookup_iz( unsigned line_aa, unsigned lookup, struct brw_wm_prog_key *key ); -#if 0 -boolean brw_wm_is_glsl(struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_wm_compile *c); -#endif +void brw_wm_emit_decls(struct brw_wm_compile *c); #endif diff --git a/src/mesa/pipe/i965simple/brw_wm_decl.c b/src/mesa/pipe/i965simple/brw_wm_decl.c new file mode 100644 index 0000000000..392f17fad6 --- /dev/null +++ b/src/mesa/pipe/i965simple/brw_wm_decl.c @@ -0,0 +1,377 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + c->tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 0; +} + + + +static int is_null( struct brw_reg reg ) +{ + return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + reg.nr == BRW_ARF_NULL); +} + +static void emit_pixel_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_xy[0])) { + + struct brw_compile *p = &c->func; + struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + + c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + brw_ADD(p, + c->pixel_xy[0], + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + + brw_ADD(p, + c->pixel_xy[1], + stride(suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + } +} + + + + + + +static void emit_delta_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->delta_xy[0])) { + struct brw_compile *p = &c->func; + struct brw_reg r1 = brw_vec1_grf(1, 0); + + emit_pixel_xy(c); + + c->delta_xy[0] = alloc_tmp(c); + c->delta_xy[1] = alloc_tmp(c); + + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + brw_ADD(p, + c->delta_xy[0], + retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + + brw_ADD(p, + c->delta_xy[1], + retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + } +} + + + +#if 0 +static void emit_pixel_w( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_w)) { + struct brw_compile *p = &c->func; + + struct brw_reg interp_wpos = c->coef_wpos; + + c->pixel_w = alloc_tmp(c); + + emit_delta_xy(c); + + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4); + brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]); + + /* Calc w */ + brw_math_16( p, + c->pixel_w, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} +#endif + + +static void emit_cinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_MOV(p, dst, suboffset(interp[i],3)); + } + } +} + +static void emit_linterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + emit_delta_xy(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + } + } +} + +#if 0 +static void emit_pinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + get_delta_xy(c); + get_pixel_w(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i); + brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + brw_MUL(p, dst, dst, c->pixel_w); + } + } +} +#endif + + + +#if 0 +static void emit_wpos( ) +{ + struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); + struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + struct tgsi_full_src_register deltas = get_delta_xy(c); + struct tgsi_full_src_register arg2; + unsigned opcode; + + opcode = WM_LINTERP; + arg2 = src_undef(); + + /* Have to treat wpos.xy specially: + */ + emit_op(c, + WM_WPOSXY, + dst_mask(dst, WRITEMASK_XY), + 0, 0, 0, + get_pixel_xy(c), + src_undef(), + src_undef()); + + dst = dst_mask(dst, WRITEMASK_ZW); + + /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + */ + emit_op(c, + WM_LINTERP, + dst, + 0, 0, 0, + interp, + deltas, + arg2); +} +#endif + + + + +/* Perform register allocation: + * + * -- r0??? + * -- passthrough depth regs (and stencil/aa??) + * -- curbe ?? + * -- inputs (coefficients) + * + * Use a totally static register allocation. This will perform poorly + * but is an easy way to get started (again). + */ +static void prealloc_reg(struct brw_wm_compile *c) +{ + int i, j; + int nr_curbe_regs = 0; + + /* R0, then some depth related regs: + */ + for (i = 0; i < c->key.nr_depth_regs; i++) { + c->payload_depth[i] = brw_vec8_grf(i*2, 0); + c->reg_index += 2; + } + + + /* Then a copy of our part of the CURBE entry: + */ + { + int nr_constants = c->fp->info.nr_regs[TGSI_FILE_CONSTANT]; + int index = 0; + + c->prog_data.max_const = 4*nr_constants; + for (i = 0; i < nr_constants; i++) { + for (j = 0; j < 4; j++, index++) + c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8, + index%8); + } + + nr_curbe_regs = 2*((4*nr_constants+15)/16); + c->reg_index += nr_curbe_regs; + } + + /* Next we receive the plane coefficients for parameter + * interpolation: + */ + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) { + c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); + c->reg_index += 2; + } + + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = c->fp->program.num_inputs * 2; + c->prog_data.curb_read_length = nr_curbe_regs; + + /* That's the end of the payload, now we can start allocating registers. + */ + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index++; + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index += 2; + + /* Now allocate room for the interpolated inputs and staging + * registers for the outputs: + */ + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_OUTPUT]; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + /* Beyond this we should only need registers for internal temporaries: + */ + c->tmp_start = c->reg_index; +} + + + + + +/* Need to interpolate fragment program inputs in as a preamble to the + * shader. A more sophisticated compiler would do this on demand, but + * we'll do it up front: + */ +void brw_wm_emit_decls(struct brw_wm_compile *c) +{ + struct tgsi_parse_context parse; + int done = 0; + + prealloc_reg(c); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned first = decl->u.DeclarationRange.First; + unsigned last = decl->u.DeclarationRange.Last; + unsigned mask = decl->Declaration.UsageMask; /* ? */ + unsigned i; + + if (decl->Declaration.File != TGSI_FILE_INPUT) + break; + + assert(decl->Declaration.Interpolate); + + for( i = first; i <= last; i++ ) { + switch (decl->Interpolation.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + emit_cinterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_linterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + //emit_pinterp(c, i, mask); + emit_linterp(c, i, mask); + break; + } + } + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + + release_tmps(c); +} diff --git a/src/mesa/pipe/i965simple/brw_wm_fp.c b/src/mesa/pipe/i965simple/brw_wm_fp.c deleted file mode 100644 index 20e90bc612..0000000000 --- a/src/mesa/pipe/i965simple/brw_wm_fp.c +++ /dev/null @@ -1,1007 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_wm.h" -#include "brw_util.h" - - -#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS - -#define X 0 -#define Y 1 -#define Z 2 -#define W 3 - - -static const char *wm_opcode_strings[] = { - "PIXELXY", - "DELTAXY", - "PIXELW", - "LINTERP", - "PINTERP", - "CINTERP", - "WPOSXY", - "FB_WRITE" -}; - -#if 0 -static const char *wm_file_strings[] = { - "PAYLOAD" -}; -#endif - - -/*********************************************************************** - * Source regs - */ -#if 0 -static struct prog_src_register src_reg(unsigned file, unsigned idx) -{ - struct prog_src_register reg; - reg.File = file; - reg.Index = idx; - reg.Swizzle = SWIZZLE_NOOP; - reg.RelAddr = 0; - reg.NegateBase = 0; - reg.Abs = 0; - reg.NegateAbs = 0; - return reg; -} - -static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst) -{ - return src_reg(dst.File, dst.Index); -} - -static struct prog_src_register src_undef( void ) -{ - return src_reg(PROGRAM_UNDEFINED, 0); -} - -static boolean src_is_undef(struct prog_src_register src) -{ - return src.File == PROGRAM_UNDEFINED; -} - -static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w ) -{ - reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w); - return reg; -} - -static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x ) -{ - return src_swizzle(reg, x, x, x, x); -} - - -/*********************************************************************** - * Dest regs - */ - -static struct prog_dst_register dst_reg(unsigned file, unsigned idx) -{ - struct prog_dst_register reg; - reg.File = file; - reg.Index = idx; - reg.WriteMask = WRITEMASK_XYZW; - reg.CondMask = 0; - reg.CondSwizzle = 0; - reg.pad = 0; - reg.CondSrc = 0; - return reg; -} - -static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask ) -{ - reg.WriteMask &= mask; - return reg; -} - -static struct prog_dst_register dst_undef( void ) -{ - return dst_reg(PROGRAM_UNDEFINED, 0); -} - - - -static struct prog_dst_register get_temp( struct brw_wm_compile *c ) -{ - int bit = ffs( ~c->fp_temp ); - - if (!bit) { - _mesa_printf("%s: out of temporaries\n", __FILE__); - exit(1); - } - - c->fp_temp |= 1<<(bit-1); - return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); -} - - -static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) -{ - c->fp_temp &= ~1<<(temp.Index + 1 - FIRST_INTERNAL_TEMP); -} - - -/*********************************************************************** - * Instructions - */ - -static const struct tgsi_token *get_fp_inst(struct brw_wm_compile *c) -{ - return &c->prog_instructions->tokens[c->nr_fp_insns++]; -} - -static struct prog_instruction *emit_insn(struct brw_wm_compile *c, - const struct prog_instruction *inst0) -{ - struct prog_instruction *inst = get_fp_inst(c); - *inst = *inst0; - inst->Data = (void *)inst0; - return inst; -} - -static struct prog_instruction * emit_op(struct brw_wm_compile *c, - unsigned op, - struct prog_dst_register dest, - unsigned saturate, - unsigned tex_src_unit, - unsigned tex_src_target, - struct prog_src_register src0, - struct prog_src_register src1, - struct prog_src_register src2 ) -{ - struct prog_instruction *inst = get_fp_inst(c); - - memset(inst, 0, sizeof(*inst)); - - inst->Opcode = op; - inst->DstReg = dest; - inst->SaturateMode = saturate; - inst->TexSrcUnit = tex_src_unit; - inst->TexSrcTarget = tex_src_target; - inst->SrcReg[0] = src0; - inst->SrcReg[1] = src1; - inst->SrcReg[2] = src2; - return inst; -} - - - - -/*********************************************************************** - * Special instructions for interpolation and other tasks - */ - -static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) -{ - if (src_is_undef(c->pixel_xy)) { - struct prog_dst_register pixel_xy = get_temp(c); - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - - - /* Emit the out calculations, and hold onto the results. Use - * two instructions as a temporary is required. - */ - /* pixel_xy.xy = PIXELXY payload[0]; - */ - emit_op(c, - WM_PIXELXY, - dst_mask(pixel_xy, WRITEMASK_XY), - 0, 0, 0, - payload_r0_depth, - src_undef(), - src_undef()); - - c->pixel_xy = src_reg_from_dst(pixel_xy); - } - - return c->pixel_xy; -} - -static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) -{ - if (src_is_undef(c->delta_xy)) { - struct prog_dst_register delta_xy = get_temp(c); - struct prog_src_register pixel_xy = get_pixel_xy(c); - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - - /* deltas.xy = DELTAXY pixel_xy, payload[0] - */ - emit_op(c, - WM_DELTAXY, - dst_mask(delta_xy, WRITEMASK_XY), - 0, 0, 0, - pixel_xy, - payload_r0_depth, - src_undef()); - - c->delta_xy = src_reg_from_dst(delta_xy); - } - - return c->delta_xy; -} - -static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) -{ - if (src_is_undef(c->pixel_w)) { - struct prog_dst_register pixel_w = get_temp(c); - struct prog_src_register deltas = get_delta_xy(c); - struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); - - - /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x - */ - emit_op(c, - WM_PIXELW, - dst_mask(pixel_w, WRITEMASK_W), - 0, 0, 0, - interp_wpos, - deltas, - src_undef()); - - - c->pixel_w = src_reg_from_dst(pixel_w); - } - - return c->pixel_w; -} - -static void emit_interp( struct brw_wm_compile *c, - unsigned idx ) -{ - struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - struct prog_src_register deltas = get_delta_xy(c); - struct prog_src_register arg2; - unsigned opcode; - - /* Need to use PINTERP on attributes which have been - * multiplied by 1/W in the SF program, and LINTERP on those - * which have not: - */ - switch (idx) { - case FRAG_ATTRIB_WPOS: - opcode = WM_LINTERP; - arg2 = src_undef(); - - /* Have to treat wpos.xy specially: - */ - emit_op(c, - WM_WPOSXY, - dst_mask(dst, WRITEMASK_XY), - 0, 0, 0, - get_pixel_xy(c), - src_undef(), - src_undef()); - - dst = dst_mask(dst, WRITEMASK_ZW); - - /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw - */ - emit_op(c, - WM_LINTERP, - dst, - 0, 0, 0, - interp, - deltas, - arg2); - break; - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - if (c->key.flat_shade) { - emit_op(c, - WM_CINTERP, - dst, - 0, 0, 0, - interp, - src_undef(), - src_undef()); - } - else { - emit_op(c, - WM_LINTERP, - dst, - 0, 0, 0, - interp, - deltas, - src_undef()); - } - break; - default: - emit_op(c, - WM_PINTERP, - dst, - 0, 0, 0, - interp, - deltas, - get_pixel_w(c)); - break; - } - - c->fp_interp_emitted |= 1<<idx; -} - -static void emit_ddx( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - unsigned idx = inst->SrcReg[0].Index; - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - - c->fp_deriv_emitted |= 1<<idx; - emit_op(c, - OPCODE_DDX, - inst->DstReg, - 0, 0, 0, - interp, - get_pixel_w(c), - src_undef()); -} - -static void emit_ddy( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - unsigned idx = inst->SrcReg[0].Index; - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - - c->fp_deriv_emitted |= 1<<idx; - emit_op(c, - OPCODE_DDY, - inst->DstReg, - 0, 0, 0, - interp, - get_pixel_w(c), - src_undef()); -} - -/*********************************************************************** - * Hacks to extend the program parameter and constant lists. - */ - -/* Add the fog parameters to the parameter list of the original - * program, rather than creating a new list. Doesn't really do any - * harm and it's not as if the parameter handling isn't a big hack - * anyway. - */ -static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, - int s0, - int s1, - int s2, - int s3, - int s4) -{ - struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - gl_state_index tokens[STATE_LENGTH]; - unsigned idx; - tokens[0] = s0; - tokens[1] = s1; - tokens[2] = s2; - tokens[3] = s3; - tokens[4] = s4; - - for (idx = 0; idx < paramList->NumParameters; idx++) { - if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR && - memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0) - return src_reg(PROGRAM_STATE_VAR, idx); - } - - idx = _mesa_add_state_reference( paramList, tokens ); - - /* Recalculate state dependency: - */ - c->fp->param_state = paramList->StateFlags; - - return src_reg(PROGRAM_STATE_VAR, idx); -} - - -static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, - float s0, - float s1, - float s2, - float s3) -{ - struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - float values[4]; - unsigned idx; - unsigned swizzle; - - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; - - /* Have to search, otherwise multiple compilations will each grow - * the parameter list. - */ - for (idx = 0; idx < paramList->NumParameters; idx++) { - if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT && - memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0) - - /* XXX: this mimics the mesa bug which puts all constants and - * parameters into the "PROGRAM_STATE_VAR" category: - */ - return src_reg(PROGRAM_STATE_VAR, idx); - } - - idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); - /* XXX what about swizzle? */ - return src_reg(PROGRAM_STATE_VAR, idx); -} - - - -/*********************************************************************** - * Expand various instructions here to simpler forms. - */ -static void precalc_dst( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct prog_src_register src0 = inst->SrcReg[0]; - struct prog_src_register src1 = inst->SrcReg[1]; - struct prog_dst_register dst = inst->DstReg; - - if (dst.WriteMask & WRITEMASK_Y) { - /* dst.y = mul src0.y, src1.y - */ - emit_op(c, - OPCODE_MUL, - dst_mask(dst, WRITEMASK_Y), - inst->SaturateMode, 0, 0, - src0, - src1, - src_undef()); - } - - - if (dst.WriteMask & WRITEMASK_XZ) { - unsigned z = GET_SWZ(src0.Swizzle, Z); - - /* dst.xz = swz src0.1zzz - */ - emit_op(c, - OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XZ), - inst->SaturateMode, 0, 0, - src_swizzle(src0, SWIZZLE_ONE, z, z, z), - src_undef(), - src_undef()); - } - if (dst.WriteMask & WRITEMASK_W) { - /* dst.w = mov src1.w - */ - emit_op(c, - OPCODE_MOV, - dst_mask(dst, WRITEMASK_W), - inst->SaturateMode, 0, 0, - src1, - src_undef(), - src_undef()); - } -} - - -static void precalc_lit( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct prog_src_register src0 = inst->SrcReg[0]; - struct prog_dst_register dst = inst->DstReg; - - if (dst.WriteMask & WRITEMASK_XW) { - /* dst.xw = swz src0.1111 - */ - emit_op(c, - OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XW), - 0, 0, 0, - src_swizzle1(src0, SWIZZLE_ONE), - src_undef(), - src_undef()); - } - - - if (dst.WriteMask & WRITEMASK_YZ) { - emit_op(c, - OPCODE_LIT, - dst_mask(dst, WRITEMASK_YZ), - inst->SaturateMode, 0, 0, - src0, - src_undef(), - src_undef()); - } -} - -static void precalc_tex( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct prog_src_register coord; - struct prog_dst_register tmpcoord; - - if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { - struct prog_instruction *out; - struct prog_dst_register tmp0 = get_temp(c); - struct prog_src_register tmp0src = src_reg_from_dst(tmp0); - struct prog_dst_register tmp1 = get_temp(c); - struct prog_src_register tmp1src = src_reg_from_dst(tmp1); - struct prog_src_register src0 = inst->SrcReg[0]; - - tmpcoord = get_temp(c); - coord = src_reg_from_dst(tmpcoord); - - out = emit_op(c, OPCODE_MOV, - tmpcoord, - 0, 0, 0, - src0, - src_undef(), - src_undef()); - out->SrcReg[0].NegateBase = 0; - out->SrcReg[0].Abs = 1; - - emit_op(c, OPCODE_MAX, - tmp0, - 0, 0, 0, - src_swizzle1(coord, X), - src_swizzle1(coord, Y), - src_undef()); - - emit_op(c, OPCODE_MAX, - tmp1, - 0, 0, 0, - tmp0src, - src_swizzle1(coord, Z), - src_undef()); - - emit_op(c, OPCODE_RCP, - tmp0, - 0, 0, 0, - tmp1src, - src_undef(), - src_undef()); - - emit_op(c, OPCODE_MUL, - tmpcoord, - 0, 0, 0, - src0, - tmp0src, - src_undef()); - - release_temp(c, tmp0); - release_temp(c, tmp1); - } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { - struct prog_src_register scale = - search_or_add_param5( c, - STATE_INTERNAL, - STATE_TEXRECT_SCALE, - inst->TexSrcUnit, - 0,0 ); - - tmpcoord = get_temp(c); - - /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } - */ - emit_op(c, - OPCODE_MUL, - tmpcoord, - 0, 0, 0, - inst->SrcReg[0], - scale, - src_undef()); - - coord = src_reg_from_dst(tmpcoord); - } - else { - coord = inst->SrcReg[0]; - } - - /* Need to emit YUV texture conversions by hand. Probably need to - * do this here - the alternative is in brw_wm_emit.c, but the - * conversion requires allocating a temporary variable which we - * don't have the facility to do that late in the compilation. - */ - if (!(c->key.yuvtex_mask & (1<<inst->TexSrcUnit))) { - emit_op(c, - OPCODE_TEX, - inst->DstReg, - inst->SaturateMode, - inst->TexSrcUnit, - inst->TexSrcTarget, - coord, - src_undef(), - src_undef()); - } - else { - /* - CONST C0 = { -.5, -.0625, -.5, 1.164 } - CONST C1 = { 1.596, -0.813, 2.018, -.391 } - UYV = TEX ... - UYV.xyz = ADD UYV, C0 - UYV.y = MUL UYV.y, C0.w - RGB.xyz = MAD UYV.xxz, C1, UYV.y - RGB.y = MAD UYV.z, C1.w, RGB.y - */ - struct prog_dst_register dst = inst->DstReg; - struct prog_src_register src0 = inst->SrcReg[0]; - struct prog_dst_register tmp = get_temp(c); - struct prog_src_register tmpsrc = src_reg_from_dst(tmp); - struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); - struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); - - /* tmp = TEX ... - */ - emit_op(c, - OPCODE_TEX, - tmp, - inst->SaturateMode, - inst->TexSrcUnit, - inst->TexSrcTarget, - src0, - src_undef(), - src_undef()); - - /* tmp.xyz = ADD TMP, C0 - */ - emit_op(c, - OPCODE_ADD, - dst_mask(tmp, WRITEMASK_XYZ), - 0, 0, 0, - tmpsrc, - C0, - src_undef()); - - /* YUV.y = MUL YUV.y, C0.w - */ - emit_op(c, - OPCODE_MUL, - dst_mask(tmp, WRITEMASK_Y), - 0, 0, 0, - tmpsrc, - src_swizzle1(C0, W), - src_undef()); - - /* RGB.xyz = MAD YUV.xxz, C1, YUV.y - */ - emit_op(c, - OPCODE_MAD, - dst_mask(dst, WRITEMASK_XYZ), - 0, 0, 0, - src_swizzle(tmpsrc, X,X,Z,Z), - C1, - src_swizzle1(tmpsrc, Y)); - - /* RGB.y = MAD YUV.z, C1.w, RGB.y - */ - emit_op(c, - OPCODE_MAD, - dst_mask(dst, WRITEMASK_Y), - 0, 0, 0, - src_swizzle1(tmpsrc, Z), - src_swizzle1(C1, W), - src_swizzle1(src_reg_from_dst(dst), Y)); - - release_temp(c, tmp); - } - - if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV) - release_temp(c, tmpcoord); -} - - -static boolean projtex( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct prog_src_register src = inst->SrcReg[0]; - - /* Only try to detect the simplest cases. Could detect (later) - * cases where we are trying to emit code like RCP {1.0}, MUL x, - * {1.0}, and so on. - * - * More complex cases than this typically only arise from - * user-provided fragment programs anyway: - */ - if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) - return 0; /* ut2004 gun rendering !?! */ - else if (src.File == PROGRAM_INPUT && - GET_SWZ(src.Swizzle, W) == W && - (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0) - return 0; - else - return 1; -} - - -static void precalc_txp( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct prog_src_register src0 = inst->SrcReg[0]; - - if (projtex(c, inst)) { - struct prog_dst_register tmp = get_temp(c); - struct prog_instruction tmp_inst; - - /* tmp0.w = RCP inst.arg[0][3] - */ - emit_op(c, - OPCODE_RCP, - dst_mask(tmp, WRITEMASK_W), - 0, 0, 0, - src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), - src_undef(), - src_undef()); - - /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww - */ - emit_op(c, - OPCODE_MUL, - dst_mask(tmp, WRITEMASK_XYZ), - 0, 0, 0, - src0, - src_swizzle1(src_reg_from_dst(tmp), W), - src_undef()); - - /* dst = precalc(TEX tmp0) - */ - tmp_inst = *inst; - tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); - precalc_tex(c, &tmp_inst); - - release_temp(c, tmp); - } - else - { - /* dst = precalc(TEX src0) - */ - precalc_tex(c, inst); - } -} - - - - - -/*********************************************************************** - * Add instructions to perform fog blending - */ - -static void fog_blend( struct brw_wm_compile *c, - struct prog_src_register fog_factor ) -{ - struct prog_dst_register outcolor = dst_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); - struct prog_src_register fogcolor = search_or_add_param5( c, STATE_FOG_COLOR, 0,0,0,0 ); - - /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */ - - emit_op(c, - OPCODE_LRP, - dst_mask(outcolor, WRITEMASK_XYZ), - 0, 0, 0, - fog_factor, - src_reg_from_dst(outcolor), - fogcolor); -} - - - -/* This one is simple - just take the interpolated fog coordinate and - * use it as the fog blend factor. - */ -static void fog_interpolated( struct brw_wm_compile *c ) -{ - struct prog_src_register fogc = src_reg(PROGRAM_INPUT, FRAG_ATTRIB_FOGC); - - if (!(c->fp_interp_emitted & (1<<FRAG_ATTRIB_FOGC))) - emit_interp(c, FRAG_ATTRIB_FOGC); - - fog_blend( c, src_swizzle1(fogc, GET_SWZ(fogc.Swizzle,X))); -} - -static void emit_fog( struct brw_wm_compile *c ) -{ - if (!c->fp->program.FogOption) - return; - - if (1) - fog_interpolated( c ); - else { - /* TODO: per-pixel fog */ - assert(0); - } -} - -static void emit_fb_write( struct brw_wm_compile *c ) -{ - struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR); - - emit_op(c, - WM_FB_WRITE, - dst_mask(dst_undef(),0), - 0, 0, 0, - outcolor, - payload_r0_depth, - outdepth); -} - - - - -/*********************************************************************** - * Emit INTERP instructions ahead of first use of each attrib. - */ - -static void validate_src_regs( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - unsigned nr_args = brw_wm_nr_args( inst->Opcode ); - unsigned i; - - for (i = 0; i < nr_args; i++) { - if (inst->SrcReg[i].File == PROGRAM_INPUT) { - unsigned idx = inst->SrcReg[i].Index; - if (!(c->fp_interp_emitted & (1<<idx))) { - emit_interp(c, idx); - } - } - } -} - - - -static void print_insns( const struct prog_instruction *insn, - unsigned nr ) -{ - unsigned i; - for (i = 0; i < nr; i++, insn++) { - _mesa_printf("%3d: ", i); - if (insn->Opcode < MAX_OPCODE) - _mesa_print_instruction(insn); - else if (insn->Opcode < MAX_WM_OPCODE) { - unsigned idx = insn->Opcode - MAX_OPCODE; - - _mesa_print_alu_instruction(insn, - wm_opcode_strings[idx], - 3); - } - else - _mesa_printf("UNKNOWN\n"); - - } -} -void brw_wm_pass_fp( struct brw_wm_compile *c ) -{ - struct brw_fragment_program *fp = c->fp; - unsigned insn; - if (BRW_DEBUG & DEBUG_WM) { - _mesa_printf("\n\n\npre-fp:\n"); - _mesa_print_program(&fp->program.Base); - _mesa_printf("\n"); - } - - c->pixel_xy = src_undef(); - c->delta_xy = src_undef(); - c->pixel_w = src_undef(); - c->nr_fp_insns = 0; - - /* Emit preamble instructions: - */ - - - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { - const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; - struct prog_instruction *out; - - /* Check for INPUT values, emit INTERP instructions where - * necessary: - */ - validate_src_regs(c, inst); - - - switch (inst->Opcode) { - case OPCODE_SWZ: - out = emit_insn(c, inst); - out->Opcode = OPCODE_MOV; - break; - - case OPCODE_ABS: - out = emit_insn(c, inst); - out->Opcode = OPCODE_MOV; - out->SrcReg[0].NegateBase = 0; - out->SrcReg[0].Abs = 1; - break; - - case OPCODE_SUB: - out = emit_insn(c, inst); - out->Opcode = OPCODE_ADD; - out->SrcReg[1].NegateBase ^= 0xf; - break; - - case OPCODE_SCS: - out = emit_insn(c, inst); - /* This should probably be done in the parser. - */ - out->DstReg.WriteMask &= WRITEMASK_XY; - break; - - case OPCODE_DST: - precalc_dst(c, inst); - break; - - case OPCODE_LIT: - precalc_lit(c, inst); - break; - - case OPCODE_TXP: - precalc_txp(c, inst); - break; - - case OPCODE_XPD: - out = emit_insn(c, inst); - /* This should probably be done in the parser. - */ - out->DstReg.WriteMask &= WRITEMASK_XYZ; - break; - - case OPCODE_KIL: - out = emit_insn(c, inst); - /* This should probably be done in the parser. - */ - out->DstReg.WriteMask = 0; - break; - case OPCODE_DDX: - emit_ddx(c, inst); - break; - case OPCODE_DDY: - emit_ddy(c, inst); - break; - case OPCODE_END: - emit_fog(c); - emit_fb_write(c); - break; - case OPCODE_PRINT: - break; - - default: - emit_insn(c, inst); - break; - } - } - - if (BRW_DEBUG & DEBUG_WM) { - _mesa_printf("\n\n\npass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); - } -} -#endif diff --git a/src/mesa/pipe/i965simple/brw_wm_glsl.c b/src/mesa/pipe/i965simple/brw_wm_glsl.c index 90e73a605a..d6dfaed826 100644 --- a/src/mesa/pipe/i965simple/brw_wm_glsl.c +++ b/src/mesa/pipe/i965simple/brw_wm_glsl.c @@ -2,753 +2,437 @@ #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" -#if 0 - -/* Only guess, need a flag in gl_fragment_program later */ -boolean brw_wm_is_glsl(struct gl_fragment_program *fp) -{ - int i; - for (i = 0; i < fp->Base.NumInstructions; i++) { - struct prog_instruction *inst = &fp->Base.Instructions[i]; - switch (inst->Opcode) { - case OPCODE_IF: - case OPCODE_INT: - case OPCODE_ENDIF: - case OPCODE_CAL: - case OPCODE_BRK: - case OPCODE_RET: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_BGNLOOP: - return TRUE; - default: - break; - } - } - return FALSE; -} -static void set_reg(struct brw_wm_compile *c, int file, int index, - int component, struct brw_reg reg) -{ - c->wm_regs[file][index][component].reg = reg; - c->wm_regs[file][index][component].inited = TRUE; -} -static int get_scalar_dst_index(struct prog_instruction *inst) +static int get_scalar_dst_index(struct tgsi_full_instruction *inst) { - int i; - for (i = 0; i < 4; i++) - if (inst->DstReg.WriteMask & (1<<i)) - break; - return i; + struct tgsi_dst_register dst = inst->FullDstRegisters[0].DstRegister; + int i; + for (i = 0; i < 4; i++) + if (dst.WriteMask & (1<<i)) + break; + return i; } static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { - struct brw_reg reg; - reg = brw_vec8_grf(c->tmp_index--, 0); - return reg; + c->tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); } static void release_tmps(struct brw_wm_compile *c) { - c->tmp_index = 127; + c->tmp_index = 0; } + static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, unsigned neg, unsigned abs) +get_reg(struct brw_wm_compile *c, int file, int index, int component ) { - struct brw_reg reg; - switch (file) { - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - file = PROGRAM_STATE_VAR; - break; - case PROGRAM_UNDEFINED: - return brw_null_reg(); - default: - break; - } - - if(c->wm_regs[file][index][component].inited) - reg = c->wm_regs[file][index][component].reg; - else - reg = brw_vec8_grf(c->reg_index, 0); - - if(!c->wm_regs[file][index][component].inited) { - set_reg(c, file, index, component, reg); - c->reg_index++; - } - - if (neg & (1<< component)) { - reg = negate(reg); - } - if (abs) - reg = brw_abs(reg); - return reg; + switch (file) { + case TGSI_FILE_NULL: + return brw_null_reg(); + + case TGSI_FILE_SAMPLER: + /* Should never get here: + */ + assert (0); + return brw_null_reg(); + + case TGSI_FILE_IMMEDIATE: + /* These need a different path: + */ + assert(0); + return brw_null_reg(); + + + case TGSI_FILE_CONSTANT: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_ADDRESS: + return c->wm_regs[file][index][component]; + + default: + assert(0); + return brw_null_reg(); + } } -static void prealloc_reg(struct brw_wm_compile *c) -{ - int i, j; - struct brw_reg reg; - int nr_interp_regs = 0; - unsigned inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; - - for (i = 0; i < 4; i++) { - reg = (i < c->key.nr_depth_regs) - ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); - } - c->reg_index += 2*c->key.nr_depth_regs; - { - int nr_params = c->fp->program.Base.Parameters->NumParameters; - struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - c->prog_data.nr_params = 4*nr_params; - for (i = 0; i < nr_params; i++) { - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index/8, - index%8); - c->prog_data.param[index] = - &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - c->nr_creg = 2*((4*nr_params+15)/16); - c->reg_index += c->nr_creg; - } - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (inputs & (1<<i)) { - nr_interp_regs++; - reg = brw_vec8_grf(c->reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - c->reg_index += 2; - - } - } - c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = nr_interp_regs * 2; - c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; -} static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - struct prog_instruction *inst, int component, int nr) -{ - return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, - 0, 0); -} - -static struct brw_reg get_src_reg(struct brw_wm_compile *c, - struct prog_src_register *src, int index, int nr) + struct tgsi_full_instruction *inst, + int component) { - int component = GET_SWZ(src->Swizzle, index); - return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + return get_reg(c, + inst->FullDstRegisters[0].DstRegister.File, + inst->FullDstRegisters[0].DstRegister.Index, + component); } -static void emit_abs( struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (inst->DstReg.WriteMask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); - brw_MOV(p, dst, brw_abs(src)); - } - } - brw_set_saturate(p, 0); -} - -static void emit_int( struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1) ; - src = get_src_reg(c, &inst->SrcReg[0], i, 1); - brw_RNDD(p, dst, src); - } - } - brw_set_saturate(p, 0); -} - -static void emit_mov( struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); - brw_MOV(p, dst, src); - } - } - brw_set_saturate(p, 0); -} - -static void emit_pixel_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); - - struct brw_reg dst0, dst1; - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); - /* Calculate pixel centers by adding 1 or 0 to each of the - * micro-tile coordinates passed in r1. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 4), 2, 4, 0), - brw_imm_v(0x10101010)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 5), 2, 4, 0), - brw_imm_v(0x11001100)); - } - -} - -static void emit_delta_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) +static int get_swz( struct tgsi_src_register src, int index ) { - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg dst0, dst1, src0, src1; - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst0, - retype(src0, BRW_REGISTER_TYPE_UW), - negate(r1)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst1, - retype(src1, BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); - - } - + switch (index & 3) { + case 0: return src.SwizzleX; + case 1: return src.SwizzleY; + case 2: return src.SwizzleZ; + case 3: return src.SwizzleW; + default: return 0; + } } - -static void fire_fb_write( struct brw_wm_compile *c, - unsigned base_reg, - unsigned nr ) +static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index ) { - struct brw_compile *p = &c->func; - - /* Pass through control information: - */ - /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ - { - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ - brw_MOV(p, - brw_message_reg(base_reg + 1), - brw_vec8_grf(1, 0)); - brw_pop_insn_state(p); - } - /* Send framebuffer write message: */ - brw_fb_WRITE(p, - retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), - base_reg, - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), - 0, /* render surface always 0 */ - nr, - 0, - 1); + switch (index & 3) { + case 0: return src.ExtSwizzleX; + case 1: return src.ExtSwizzleY; + case 2: return src.ExtSwizzleZ; + case 3: return src.ExtSwizzleW; + default: return 0; + } } -static void emit_fb_write(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - int nr = 2; - int channel; - struct brw_reg src0;//, src1, src2, dst; - - /* Reserve a space for AA - may not be needed: - */ - if (c->key.aa_dest_stencil_reg) - nr += 1; - { - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); - } - fire_fb_write(c, 0, nr); -} - -static void emit_pixel_w( struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - if (mask & WRITEMASK_W) { - struct brw_reg dst, src0, delta0, delta1; - struct brw_reg interp3; - - dst = get_dst_reg(c, inst, 3, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - - interp3 = brw_vec1_grf(src0.nr+1, 4); - /* Calc 1/w - just linterp wpos[3] optimized by putting the - * result straight into a message reg. - */ - brw_LINE(p, brw_null_reg(), interp3, delta0); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); - - /* Calc w */ - brw_math_16( p, dst, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), - BRW_MATH_PRECISION_FULL); - } -} - -static void emit_linterp(struct brw_wm_compile *c, - struct prog_instruction *inst) +static struct brw_reg get_src_reg(struct brw_wm_compile *c, + struct tgsi_full_src_register *src, + int index) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0; - - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - unsigned nr = src0.nr; - int i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - brw_LINE(p, brw_null_reg(), interp[i], delta0); - brw_MAC(p, dst, suboffset(interp[i],1), delta1); - } - } + struct brw_reg reg; + int component = index; + int neg = 0; + int abs = 0; + + if (src->SrcRegister.Negate) + neg = 1; + + component = get_swz(src->SrcRegister, component); + + /* Yes, there are multiple negates: + */ + switch (component & 3) { + case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break; + case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break; + case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break; + case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break; + } + + /* And multiple swizzles, fun isn't it: + */ + component = get_ext_swz(src->SrcRegisterExtSwz, component); + + /* Can't handle this, don't know if we need to: + */ + assert(src->SrcRegisterExtSwz.ExtDivide == 0); + + /* Not handling indirect lookups yet: + */ + assert(src->SrcRegister.Indirect == 0); + + /* Don't know what dimension means: + */ + assert(src->SrcRegister.Dimension == 0); + + /* Will never handle any of this stuff: + */ + assert(src->SrcRegisterExtMod.Complement == 0); + assert(src->SrcRegisterExtMod.Bias == 0); + assert(src->SrcRegisterExtMod.Scale2X == 0); + + if (src->SrcRegisterExtMod.Absolute) + abs = 1; + + /* Another negate! This is a post-absolute negate, which we + * can't do. Need to clean the crap out of tgsi somehow. + */ + assert(src->SrcRegisterExtMod.Negate == 0); + + switch( component ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + reg = get_reg(c, + src->SrcRegister.File, + src->SrcRegister.Index, + component ); + + if (neg) + reg = negate(reg); + + if (abs) + reg = brw_abs(reg); + + break; + + /* XXX: this won't really work in the general case, but we know + * that the extended swizzle is only allowed in the SWZ + * instruction (right??), in which case using an immediate + * directly will work. + */ + case TGSI_EXTSWIZZLE_ZERO: + reg = brw_imm_f(0); + break; + + case TGSI_EXTSWIZZLE_ONE: + if (neg && !abs) + reg = brw_imm_f(-1.0); + else + reg = brw_imm_f(1.0); + break; + + default: + assert(0); + break; + } + + + return reg; } -static void emit_cinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_abs( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - - struct brw_reg interp[4]; - struct brw_reg dst, src0; - - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - unsigned nr = src0.nr; - int i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - brw_MOV(p, dst, suboffset(interp[i],3)); - } - } + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + int i; + struct brw_compile *p = &c->func; + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, &inst->FullSrcRegisters[0], i); + brw_MOV(p, dst, brw_abs(src)); /* NOTE */ + } + } + brw_set_saturate(p, 0); } -static void emit_pinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0, w; - - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - w = get_src_reg(c, &inst->SrcReg[2], 3, 1); - unsigned nr = src0.nr; - int i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - brw_LINE(p, brw_null_reg(), interp[i], delta0); - brw_MAC(p, dst, suboffset(interp[i],1), - delta1); - brw_MUL(p, dst, dst, w); - } - } -} static void emit_xpd(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - int i; - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - for (i = 0; i < 4; i++) { - unsigned i2 = (i+2)%3; - unsigned i1 = (i+1)%3; - if (mask & (1<<i)) { - struct brw_reg src0, src1, dst; - dst = get_dst_reg(c, inst, i, 1); - src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1)); - src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); - brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - brw_MAC(p, dst, src0, src1); - brw_set_saturate(p, 0); - } - } - brw_set_saturate(p, 0); + int i; + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + for (i = 0; i < 4; i++) { + unsigned i2 = (i+2)%3; + unsigned i1 = (i+1)%3; + if (mask & (1<<i)) { + struct brw_reg src0, src1, dst; + dst = get_dst_reg(c, inst, i); + src0 = negate(get_src_reg(c, &inst->FullSrcRegisters[0], i2)); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1); + brw_MUL(p, brw_null_reg(), src0, src1); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + brw_MAC(p, dst, src0, src1); + brw_set_saturate(p, 0); + } + } + brw_set_saturate(p, 0); } static void emit_dp3(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_reg src0[3], src1[3], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); - } - - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, 0); + struct brw_reg src0[3], src1[3], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 3; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, 0); } static void emit_dp4(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); - } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, brw_null_reg(), src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, brw_null_reg(), src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); } static void emit_dph(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); - } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); } static void emit_math1(struct brw_wm_compile *c, - struct prog_instruction *inst, unsigned func) + struct tgsi_full_instruction *inst, unsigned func) { - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - brw_MOV(p, brw_message_reg(2), src0); - brw_math(p, + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MOV(p, brw_message_reg(2), src0); + brw_math(p, dst, func, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + ((inst->Instruction.Saturate != TGSI_SAT_NONE) + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } -static void emit_rcp(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_INV); -} -static void emit_rsq(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_alu2(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) { - emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_alu2(p, opcode, dst, src0, src1); + } + } + brw_set_saturate(p, 0); } -static void emit_sin(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); -} -static void emit_cos(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_alu1(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) { - emit_math1(c, inst, BRW_MATH_FUNCTION_COS); + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + brw_alu1(p, opcode, dst, src0); + } + } + if (inst->Instruction.Saturate != TGSI_SAT_NONE) + brw_set_saturate(p, 0); } -static void emit_ex2(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); -} - -static void emit_lg2(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); -} - -static void emit_add(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - unsigned mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_ADD(p, dst, src0, src1); - } - } - brw_set_saturate(p, 0); -} - -static void emit_sub(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - unsigned mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_ADD(p, dst, src0, negate(src1)); - } - } - brw_set_saturate(p, 0); -} - -static void emit_mul(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - unsigned mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_MUL(p, dst, src0, src1); - } - } - brw_set_saturate(p, 0); -} - -static void emit_frc(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - unsigned mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - brw_FRC(p, dst, src0); - } - } - if (inst->SaturateMode != SATURATE_OFF) - brw_set_saturate(p, 0); -} - -static void emit_flr(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - unsigned mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - brw_RNDD(p, dst, src0); - } - } - brw_set_saturate(p, 0); -} static void emit_max(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); } static void emit_min(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); } static void emit_pow(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + struct brw_compile *p = &c->func; + struct brw_reg dst, src0, src1; + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0); - brw_MOV(p, brw_message_reg(2), src0); - brw_MOV(p, brw_message_reg(3), src1); + brw_MOV(p, brw_message_reg(2), src0); + brw_MOV(p, brw_message_reg(3), src1); - brw_math(p, + brw_math(p, dst, BRW_MATH_FUNCTION_POW, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + (inst->Instruction.Saturate != TGSI_SAT_NONE + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, @@ -756,601 +440,636 @@ static void emit_pow(struct brw_wm_compile *c, } static void emit_lrp(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg dst, tmp1, tmp2, src0, src1, src2; - int i; - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - - if (src1.nr == dst.nr) { - tmp1 = alloc_tmp(c); - brw_MOV(p, tmp1, src1); - } else - tmp1 = src1; - - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); - if (src2.nr == dst.nr) { - tmp2 = alloc_tmp(c); - brw_MOV(p, tmp2, src2); - } else - tmp2 = src2; - - brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); - brw_MUL(p, brw_null_reg(), dst, tmp2); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0, tmp1); - brw_set_saturate(p, 0); - } - release_tmps(c); - } + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, tmp1, tmp2, src0, src1, src2; + int i; + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + + if (src1.nr == dst.nr) { + tmp1 = alloc_tmp(c); + brw_MOV(p, tmp1, src1); + } else + tmp1 = src1; + + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + if (src2.nr == dst.nr) { + tmp2 = alloc_tmp(c); + brw_MOV(p, tmp2, src2); + } else + tmp2 = src2; + + brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, tmp2); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0, tmp1); + brw_set_saturate(p, 0); + } + release_tmps(c); + } } static void emit_kil(struct brw_wm_compile *c) { - struct brw_compile *p = &c->func; - struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK - brw_AND(p, depth, c->emit_mask_reg, depth); - brw_pop_insn_state(p); + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); } static void emit_mad(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1, src2; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); - brw_MUL(p, dst, src0, src1); - - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, dst, src2); - brw_set_saturate(p, 0); - } - } + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1, src2; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + brw_MUL(p, dst, src0, src1); + + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, dst, src2); + brw_set_saturate(p, 0); + } + } } static void emit_sop(struct brw_wm_compile *c, - struct prog_instruction *inst, unsigned cond) + struct tgsi_full_instruction *inst, unsigned cond) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1; - int i; - - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_CMP(p, brw_null_reg(), cond, src0, src1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(0.0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(1.0)); - } - } - brw_pop_insn_state(p); + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1; + int i; + + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_CMP(p, brw_null_reg(), cond, src0, src1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(0.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_pop_insn_state(p); } -static void emit_slt(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_L); -} -static void emit_sle(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_ddx(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) { - emit_sop(c, inst, BRW_CONDITIONAL_LE); + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + nr = src0.nr; + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, interp[i]); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); } -static void emit_sgt(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_ddy(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) { - emit_sop(c, inst, BRW_CONDITIONAL_G); + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + nr = src0.nr; + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, suboffset(interp[i], 1)); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); } -static void emit_sge(struct brw_wm_compile *c, - struct prog_instruction *inst) +/* TODO + BIAS on SIMD8 not workind yet... +*/ +static void emit_txb(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) { - emit_sop(c, inst, BRW_CONDITIONAL_GE); -} +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned i; + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); -static void emit_seq(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_EQ); -} +#if 0 + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + default: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), src[2]); + break; + } +#else + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); +#endif -static void emit_sne(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_NEQ); + brw_MOV(p, brw_message_reg(5), src[3]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + 4, + 4, + 0); +#endif } -static void emit_ddx(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_tex(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) { - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - unsigned nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); - nr = src0.nr; - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - brw_MOV(p, dst, interp[i]); - brw_MUL(p, dst, dst, w); - } - } - brw_set_saturate(p, 0); -} +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned msg_len; + unsigned i, nr; + unsigned emit; + boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0; + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); -static void emit_ddy(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - unsigned nr, i; - - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - nr = src0.nr; - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - brw_MOV(p, dst, suboffset(interp[i], 1)); - brw_MUL(p, dst, dst, w); - } - } - brw_set_saturate(p, 0); -} +#if 0 + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + default: + emit = WRITEMASK_XYZ; + nr = 3; + break; + } +#else + emit = WRITEMASK_XY; + nr = 2; +#endif -static void emit_wpos_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->DstReg.WriteMask; - struct brw_reg src0[2], dst[2]; - - dst[0] = get_dst_reg(c, inst, 0, 1); - dst[1] = get_dst_reg(c, inst, 1, 1); - - src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1); - - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_MOV(p, - dst[0], - retype(src0[0], BRW_REGISTER_TYPE_UW)); - } - - if (mask & WRITEMASK_Y) { - /* TODO -- window_height - Y */ - brw_MOV(p, - dst[1], - retype(src0[1], BRW_REGISTER_TYPE_UW)); - - } + msg_len = 1; + + for (i = 0; i < nr; i++) { + static const unsigned swz[4] = {0,1,2,2}; + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); + else + brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); + msg_len += 1; + } + + if (shadow) { + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(6), src[2]); + } + + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, + 4, + shadow ? 6 : 4, + 0); + + if (shadow) + brw_MOV(p, dst[3], brw_imm_f(1.0)); +#endif } -/* TODO - BIAS on SIMD8 not workind yet... - */ -static void emit_txb(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - unsigned i; - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - default: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), src[2]); - break; - } - brw_MOV(p, brw_message_reg(5), src[3]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - inst->TexSrcUnit + 1, /* surface */ - inst->TexSrcUnit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, - 4, - 4, - 0); -} -static void emit_tex(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - unsigned msg_len; - unsigned i, nr; - unsigned emit; - boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0; - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - emit = WRITEMASK_X; - nr = 1; - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - emit = WRITEMASK_XY; - nr = 2; - break; - default: - emit = WRITEMASK_XYZ; - nr = 3; - break; - } - msg_len = 1; - - for (i = 0; i < nr; i++) { - static const unsigned swz[4] = {0,1,2,2}; - if (emit & (1<<i)) - brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); - else - brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); - msg_len += 1; - } - - if (shadow) { - brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), src[2]); - } - - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - inst->TexSrcUnit + 1, /* surface */ - inst->TexSrcUnit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, - 4, - shadow ? 6 : 4, - 0); - - if (shadow) - brw_MOV(p, dst[3], brw_imm_f(1.0)); -} -static void post_wm_emit( struct brw_wm_compile *c ) + +static void emit_fb_write(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) { - unsigned nr_insns = c->fp->program.Base.NumInstructions; - unsigned insn, target_insn; - struct prog_instruction *inst1, *inst2; - struct brw_instruction *brw_inst1, *brw_inst2; - int offset; - for (insn = 0; insn < nr_insns; insn++) { - inst1 = &c->fp->program.Base.Instructions[insn]; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case OPCODE_CAL: - target_insn = inst1->BranchTarget; - inst2 = &c->fp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } - } + struct brw_compile *p = &c->func; + int nr = 2; + int channel; + int base_reg = 0; + + // src0 = output color + // src1 = payload_depth[0] + // src2 = output depth + // dst = ??? + + + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + + { + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel]; + + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); + } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); + } + + + /* Pass through control information: + */ + /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + + /* Send framebuffer write message: */ + brw_fb_WRITE(p, + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + 0, /* render surface always 0 */ + nr, + 0, + 1); + } -static void brw_wm_emit_glsl(struct brw_wm_compile *c) +static void brw_wm_emit_instruction( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst ) { -#define MAX_IFSN 32 -#define MAX_LOOP_DEPTH 32 - struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH]; - struct brw_instruction *inst0, *inst1; - int i, if_insn = 0, loop_insn = 0; - struct brw_compile *p = &c->func; - struct brw_indirect stack_index = brw_indirect(0, 0); - - brw_init_compile(&c->func); - c->reg_index = 0; - prealloc_reg(c); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); - - for (i = 0; i < c->nr_fp_insns; i++) { - struct prog_instruction *inst = &c->prog_instructions[i]; - struct prog_instruction *orig_inst; - - if ((orig_inst = inst->Data) != 0) - orig_inst->Data = current_insn(p); - - if (inst->CondUpdate) - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); - - switch (inst->Opcode) { - case WM_PIXELXY: - emit_pixel_xy(c, inst); - break; - case WM_DELTAXY: - emit_delta_xy(c, inst); - break; - case WM_PIXELW: - emit_pixel_w(c, inst); - break; - case WM_LINTERP: - emit_linterp(c, inst); - break; - case WM_PINTERP: - emit_pinterp(c, inst); - break; - case WM_CINTERP: - emit_cinterp(c, inst); - break; - case WM_WPOSXY: - emit_wpos_xy(c, inst); - break; - case WM_FB_WRITE: - emit_fb_write(c, inst); - break; - case OPCODE_ABS: - emit_abs(c, inst); - break; - case OPCODE_ADD: - emit_add(c, inst); - break; - case OPCODE_SUB: - emit_sub(c, inst); - break; - case OPCODE_FRC: - emit_frc(c, inst); - break; - case OPCODE_FLR: - emit_flr(c, inst); - break; - case OPCODE_LRP: - emit_lrp(c, inst); - break; - case OPCODE_INT: - emit_int(c, inst); - break; - case OPCODE_MOV: - emit_mov(c, inst); - break; - case OPCODE_DP3: - emit_dp3(c, inst); - break; - case OPCODE_DP4: - emit_dp4(c, inst); - break; - case OPCODE_XPD: - emit_xpd(c, inst); - break; - case OPCODE_DPH: - emit_dph(c, inst); - break; - case OPCODE_RCP: - emit_rcp(c, inst); - break; - case OPCODE_RSQ: - emit_rsq(c, inst); - break; - case OPCODE_SIN: - emit_sin(c, inst); - break; - case OPCODE_COS: - emit_cos(c, inst); - break; - case OPCODE_EX2: - emit_ex2(c, inst); - break; - case OPCODE_LG2: - emit_lg2(c, inst); - break; - case OPCODE_MAX: - emit_max(c, inst); - break; - case OPCODE_MIN: - emit_min(c, inst); - break; - case OPCODE_DDX: - emit_ddx(c, inst); - break; - case OPCODE_DDY: - emit_ddy(c, inst); - break; - case OPCODE_SLT: - emit_slt(c, inst); - break; - case OPCODE_SLE: - emit_sle(c, inst); - break; - case OPCODE_SGT: - emit_sgt(c, inst); - break; - case OPCODE_SGE: - emit_sge(c, inst); - break; - case OPCODE_SEQ: - emit_seq(c, inst); - break; - case OPCODE_SNE: - emit_sne(c, inst); - break; - case OPCODE_MUL: - emit_mul(c, inst); - break; - case OPCODE_POW: - emit_pow(c, inst); - break; - case OPCODE_MAD: - emit_mad(c, inst); - break; - case OPCODE_TEX: - emit_tex(c, inst); - break; - case OPCODE_TXB: - emit_txb(c, inst); - break; - case OPCODE_KIL_NV: - emit_kil(c); - break; - case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); - break; - case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); - break; - case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); - break; - case OPCODE_BGNSUB: - case OPCODE_ENDSUB: - break; - case OPCODE_CAL: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(4)); - orig_inst = inst->Data; - orig_inst->Data = &p->store[p->nr_insn]; - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_pop_insn_state(p); - break; - - case OPCODE_RET: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(-4)); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_pop_insn_state(p); - - break; - case OPCODE_BGNLOOP: - loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); - break; - case OPCODE_BRK: - brw_BREAK(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_CONT: - brw_CONT(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_ENDLOOP: - loop_insn--; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]); - /* patch all the BREAK instructions from - last BEGINLOOP */ - while (inst0 > loop_inst[loop_insn]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { - inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; - inst0->bits3.if_else.pop_count = 0; - } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { - inst0->bits3.if_else.jump_count = inst1 - inst0; - inst0->bits3.if_else.pop_count = 0; - } - } - break; - default: - _mesa_printf("unsupported IR in fragment shader %d\n", - inst->Opcode); - } - if (inst->CondUpdate) - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - else - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - post_wm_emit(c); - for (i = 0; i < c->fp->program.Base.NumInstructions; i++) - c->fp->program.Base.Instructions[i].Data = NULL; + struct brw_compile *p = &c->func; + +#if 0 + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#endif + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + emit_abs(c, inst); + break; + case TGSI_OPCODE_ADD: + emit_alu2(c, inst, BRW_OPCODE_ADD); + break; + case TGSI_OPCODE_SUB: + assert(0); +// emit_alu2(c, inst, BRW_OPCODE_SUB); + break; + case TGSI_OPCODE_FRC: + emit_alu1(c, inst, BRW_OPCODE_FRC); + break; + case TGSI_OPCODE_FLR: + assert(0); +// emit_alu1(c, inst, BRW_OPCODE_FLR); + break; + case TGSI_OPCODE_LRP: + emit_lrp(c, inst); + break; + case TGSI_OPCODE_INT: + emit_alu1(c, inst, BRW_OPCODE_RNDD); + break; + case TGSI_OPCODE_MOV: + emit_alu1(c, inst, BRW_OPCODE_MOV); + break; + case TGSI_OPCODE_DP3: + emit_dp3(c, inst); + break; + case TGSI_OPCODE_DP4: + emit_dp4(c, inst); + break; + case TGSI_OPCODE_XPD: + emit_xpd(c, inst); + break; + case TGSI_OPCODE_DPH: + emit_dph(c, inst); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, inst, BRW_MATH_FUNCTION_INV); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); + break; + case TGSI_OPCODE_SIN: + emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); + break; + case TGSI_OPCODE_COS: + emit_math1(c, inst, BRW_MATH_FUNCTION_COS); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); + break; + case TGSI_OPCODE_MAX: + emit_max(c, inst); + break; + case TGSI_OPCODE_MIN: + emit_min(c, inst); + break; + case TGSI_OPCODE_DDX: + emit_ddx(c, inst); + break; + case TGSI_OPCODE_DDY: + emit_ddy(c, inst); + break; + case TGSI_OPCODE_SLT: + emit_sop(c, inst, BRW_CONDITIONAL_L); + break; + case TGSI_OPCODE_SLE: + emit_sop(c, inst, BRW_CONDITIONAL_LE); + break; + case TGSI_OPCODE_SGT: + emit_sop(c, inst, BRW_CONDITIONAL_G); + break; + case TGSI_OPCODE_SGE: + emit_sop(c, inst, BRW_CONDITIONAL_GE); + break; + case TGSI_OPCODE_SEQ: + emit_sop(c, inst, BRW_CONDITIONAL_EQ); + break; + case TGSI_OPCODE_SNE: + emit_sop(c, inst, BRW_CONDITIONAL_NEQ); + break; + case TGSI_OPCODE_MUL: + emit_alu2(c, inst, BRW_OPCODE_MUL); + break; + case TGSI_OPCODE_POW: + emit_pow(c, inst); + break; + case TGSI_OPCODE_MAD: + emit_mad(c, inst); + break; + case TGSI_OPCODE_TEX: + emit_tex(c, inst); + break; + case TGSI_OPCODE_TXB: + emit_txb(c, inst); + break; + case TGSI_OPCODE_TEXKILL: + emit_kil(c); + break; + case TGSI_OPCODE_IF: + assert(c->if_insn < MAX_IFSN); + c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_ELSE: + c->if_inst[c->if_insn-1] = brw_ELSE(p, c->if_inst[c->if_insn-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(c->if_insn > 0); + brw_ENDIF(p, c->if_inst[--c->if_insn]); + break; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + break; + case TGSI_OPCODE_CAL: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(4)); +// orig_inst = inst->Data; +// orig_inst->Data = &p->store[p->nr_insn]; + assert(0); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_pop_insn_state(p); + break; + + case TGSI_OPCODE_RET: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); + + break; + case TGSI_OPCODE_LOOP: + c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_ENDLOOP: + c->loop_insn--; + c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]); + /* patch all the BREAK instructions from + last BEGINLOOP */ + while (c->inst0 > c->loop_inst[c->loop_insn]) { + c->inst0--; + if (c->inst0->header.opcode == BRW_OPCODE_BREAK) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1; + c->inst0->bits3.if_else.pop_count = 0; + } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0; + c->inst0->bits3.if_else.pop_count = 0; + } + } + break; + case TGSI_OPCODE_END: + emit_fb_write(c, inst); + break; + + default: + _mesa_printf("unsupported IR in fragment shader %d\n", + inst->Instruction.Opcode); + } +#if 0 + if (inst->CondUpdate) + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + else + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif } + + + + + void brw_wm_glsl_emit(struct brw_wm_compile *c) { - brw_wm_pass_fp(c); - c->tmp_index = 127; - brw_wm_emit_glsl(c); - c->prog_data.total_grf = c->reg_index; - c->prog_data.total_scratch = 0; -} + struct tgsi_parse_context parse; + struct brw_compile *p = &c->func; + + brw_init_compile(&c->func); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + c->reg_index = 0; + c->if_insn = 0; + c->loop_insn = 0; + c->stack_index = brw_indirect(0,0); + + /* Do static register allocation and parameter interpolation: + */ + brw_wm_emit_decls( c ); + + /* Emit the actual program. All done with very direct translation, + * hopefully we can improve on this shortly... + */ + brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* already done */ + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* not handled yet */ + assert(0); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + } + + tgsi_parse_free (&parse); + + /* Fix up call targets: + */ +#if 0 + { + unsigned nr_insns = c->fp->program.Base.NumInstructions; + unsigned insn, target_insn; + struct tgsi_full_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->fp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case TGSI_OPCODE_CAL: + target_insn = inst1->BranchTarget; + inst2 = &c->fp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } + } #endif + + c->prog_data.total_grf = c->reg_index; + c->prog_data.total_scratch = 0; +} diff --git a/src/mesa/pipe/i965simple/brw_wm_sampler_state.c b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c index 4ea0dd7db0..fbeea8c809 100644 --- a/src/mesa/pipe/i965simple/brw_wm_sampler_state.c +++ b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c @@ -229,12 +229,12 @@ static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_samp * complicates various things. However, this is still too confusing - * FIXME: simplify all the different new texture state flags. */ -void brw_upload_wm_samplers(struct brw_context *brw) +static void upload_wm_samplers(struct brw_context *brw) { unsigned unit; unsigned sampler_count = 0; - /* _NEW_TEXTURE */ + /* BRW_NEW_SAMPLER */ for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { if (brw->attribs.Samplers[unit]) { /* FIXME: correctly detect enabled ones */ const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; @@ -262,14 +262,11 @@ void brw_upload_wm_samplers(struct brw_context *brw) sizeof(struct brw_sampler_state) * brw->wm.sampler_count); } -#if 0 const struct brw_tracked_state brw_wm_samplers = { .dirty = { - .mesa = _NEW_TEXTURE, - .brw = 0, + .brw = BRW_NEW_SAMPLER, .cache = 0 }, .update = upload_wm_samplers }; -#endif diff --git a/src/mesa/pipe/i965simple/brw_wm_state.c b/src/mesa/pipe/i965simple/brw_wm_state.c index b45fb2f56b..52d2c85423 100644 --- a/src/mesa/pipe/i965simple/brw_wm_state.c +++ b/src/mesa/pipe/i965simple/brw_wm_state.c @@ -34,15 +34,13 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_wm.h" +#include "pipe/p_util.h" /*********************************************************************** * WM unit - fragment programs and rasterization */ - -#if 0 static void upload_wm_unit(struct brw_context *brw ) { - struct intel_context *intel = &brw->intel; struct brw_wm_unit_state wm; unsigned max_threads; unsigned per_thread; @@ -56,7 +54,7 @@ static void upload_wm_unit(struct brw_context *brw ) memset(&wm, 0, sizeof(wm)); /* CACHE_NEW_WM_PROG */ - wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1; + wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1; wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; @@ -64,9 +62,10 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm5.max_threads = max_threads; - per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024); + per_thread = align(brw->wm.prog_data->total_scratch, 1024); assert(per_thread <= 12 * 1024); +#if 0 if (brw->wm.prog_data->total_scratch) { unsigned total = per_thread * (max_threads + 1); @@ -95,6 +94,7 @@ static void upload_wm_unit(struct brw_context *brw ) * so just fail for now if we hit that path. */ assert(brw->wm.prog_data->total_scratch == 0); +#endif /* CACHE_NEW_SURFACE */ wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; @@ -112,23 +112,20 @@ static void upload_wm_unit(struct brw_context *brw ) /* BRW_NEW_FRAGMENT_PROGRAM */ { - const struct gl_fragment_program *fp = brw->fragment_program; + const struct brw_fragment_program *fp = brw->attribs.FragmentProgram; - if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS)) + if (fp->UsesDepth) wm.wm5.program_uses_depth = 1; /* as far as we can tell */ - if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) + if (fp->ComputesDepth) wm.wm5.program_computes_depth = 1; - /* _NEW_COLOR */ + /* BRW_NEW_ALPHA_TEST */ if (fp->UsesKill || - brw->attribs.Color->AlphaEnabled) + brw->attribs.AlphaTest->enabled) wm.wm5.program_uses_killpixel = 1; - if (brw_wm_is_glsl(fp)) - wm.wm5.enable_8_pix = 1; - else - wm.wm5.enable_16_pix = 1; + wm.wm5.enable_8_pix = 1; } wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ @@ -138,11 +135,11 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm5.line_aa_region_width = 0; wm.wm5.line_endcap_aa_region_width = 1; - /* _NEW_POLYGONSTIPPLE */ - if (brw->attribs.Polygon->StippleFlag) + /* BRW_NEW_RASTERIZER */ + if (brw->attribs.Raster->poly_stipple_enable) wm.wm5.polygon_stipple = 1; - /* _NEW_POLYGON */ +#if 0 if (brw->attribs.Polygon->OffsetFill) { wm.wm5.depth_offset = 1; /* Something wierd going on with legacy_global_depth_bias, @@ -156,13 +153,13 @@ static void upload_wm_unit(struct brw_context *brw ) */ wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; } +#endif - /* _NEW_LINE */ - if (brw->attribs.Line->StippleFlag) { + if (brw->attribs.Raster->line_stipple_enable) { wm.wm5.line_stipple = 1; } - if (BRW_DEBUG & DEBUG_STATS || intel->stats_wm) + if (BRW_DEBUG & DEBUG_STATS) wm.wm4.stats_enable = 1; brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); @@ -183,14 +180,10 @@ static void upload_wm_unit(struct brw_context *brw ) const struct brw_tracked_state brw_wm_unit = { .dirty = { - .mesa = (_NEW_POLYGON | - _NEW_POLYGONSTIPPLE | - _NEW_LINE | - _NEW_COLOR), - - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_CURBE_OFFSETS | - BRW_NEW_LOCK), + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_ALPHA_TEST | + BRW_NEW_FS | + BRW_NEW_CURBE_OFFSETS), .cache = (CACHE_NEW_SURFACE | CACHE_NEW_WM_PROG | @@ -199,4 +192,3 @@ const struct brw_tracked_state brw_wm_unit = { .update = upload_wm_unit }; -#endif diff --git a/src/mesa/pipe/i965simple/brw_wm_surface_state.c b/src/mesa/pipe/i965simple/brw_wm_surface_state.c index 844cfc54a9..6e68c4c660 100644 --- a/src/mesa/pipe/i965simple/brw_wm_surface_state.c +++ b/src/mesa/pipe/i965simple/brw_wm_surface_state.c @@ -33,8 +33,6 @@ #include "brw_state.h" #include "brw_defines.h" - -#if 0 static unsigned translate_tex_target( int target ) { switch (target) { @@ -75,13 +73,13 @@ static unsigned translate_tex_format( unsigned mesa_format ) assert(0); /* not supported for sampling */ return BRW_SURFACEFORMAT_R8G8B8_UNORM; - case PIPE_FORMAT_U_A8_R8_G8_B8: + case PIPE_FORMAT_B8G8R8A8_UNORM: return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - case PIPE_FORMAT_RGBA8888_REV: + case PIPE_FORMAT_R8G8B8A8_UNORM: return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; - case PIPE_FORMAT_U_R5_G6_B5: + case PIPE_FORMAT_R5G6B5_UNORM: return BRW_SURFACEFORMAT_B5G6R5_UNORM; case PIPE_FORMAT_A1R5G5B5_UNORM: @@ -95,14 +93,15 @@ static unsigned translate_tex_format( unsigned mesa_format ) case PIPE_FORMAT_YCBCR: return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; - +#if 0 case PIPE_FORMAT_RGB_FXT1: case PIPE_FORMAT_RGBA_FXT1: return BRW_SURFACEFORMAT_FXT1; +#endif case PIPE_FORMAT_Z16_UNORM: return BRW_SURFACEFORMAT_I16_UNORM; - +#if 0 case PIPE_FORMAT_RGB_DXT1: return BRW_SURFACEFORMAT_DXT1_RGB; @@ -119,6 +118,7 @@ static unsigned translate_tex_format( unsigned mesa_format ) return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; case PIPE_FORMAT_SRGB_DXT1: return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; +#endif default: assert(0); @@ -126,41 +126,46 @@ static unsigned translate_tex_format( unsigned mesa_format ) } } +static unsigned brw_buffer_offset(struct brw_context *brw, + struct pipe_buffer_handle *buffer) +{ + return brw->winsys->get_buffer_offset(brw->winsys, + buffer, + 0); +} + static -void brw_update_texture_surface( GLcontext *ctx, +void brw_update_texture_surface( struct brw_context *brw, unsigned unit ) { - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; + const struct brw_texture *tObj = brw->attribs.Texture[unit]; struct brw_surface_state surf; memset(&surf, 0, sizeof(surf)); surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf.ss0.surface_type = translate_tex_target(tObj->Target); - surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat); + surf.ss0.surface_type = translate_tex_target(tObj->base.target); + surf.ss0.surface_format = translate_tex_format(tObj->base.format); /* This is ok for all textures with channel width 8bit or less: */ /* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ /* Updated in emit_reloc */ - surf.ss1.base_addr = brw_buffer_offset( intelObj->mt->region->buffer ); + surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer ); - surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel; - surf.ss2.width = firstImage->Width - 1; - surf.ss2.height = firstImage->Height - 1; + surf.ss2.mip_count = tObj->base.last_level - tObj->base.first_level; + surf.ss2.width = tObj->base.width[0]; + surf.ss2.height = tObj->base.height[0]; surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf.ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */ - surf.ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1; - surf.ss3.depth = firstImage->Depth - 1; + surf.ss3.tiled_surface = 0; /* always zero */ + surf.ss3.pitch = tObj->pitch; + surf.ss3.depth = tObj->base.depth[0]; surf.ss4.min_lod = 0; - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + if (tObj->base.target == PIPE_TEXTURE_CUBE) { surf.ss0.cube_pos_x = 1; surf.ss0.cube_pos_y = 1; surf.ss0.cube_pos_z = 1; @@ -180,13 +185,14 @@ void brw_update_texture_surface( GLcontext *ctx, static void upload_wm_surfaces(struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; - struct intel_context *intel = &brw->intel; unsigned i; { struct brw_surface_state surf; - struct intel_region *region = brw->state.draw_region; + + /* BRW_NEW_FRAMEBUFFER + */ + struct pipe_surface *region = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/ memset(&surf, 0, sizeof(surf)); @@ -198,27 +204,27 @@ static void upload_wm_surfaces(struct brw_context *brw ) surf.ss0.surface_type = BRW_SURFACE_2D; - surf.ss1.base_addr = brw_buffer_offset( region->buffer ); + surf.ss1.base_addr = brw_buffer_offset( brw, region->buffer ); - surf.ss2.width = region->pitch - 1; /* XXX: not really! */ - surf.ss2.height = region->height - 1; + surf.ss2.width = region->width; + surf.ss2.height = region->height; surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf.ss3.tiled_surface = region->tiled; - surf.ss3.pitch = (region->pitch * region->cpp) - 1; + surf.ss3.tiled_surface = 0; + surf.ss3.pitch = region->pitch; } else { surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; surf.ss0.surface_type = BRW_SURFACE_NULL; } - /* _NEW_COLOR */ - surf.ss0.color_blend = (!brw->attribs.Color->_LogicOpEnabled && - brw->attribs.Color->BlendEnabled); + /* BRW_NEW_BLEND */ + surf.ss0.color_blend = (!brw->attribs.Blend->logicop_enable && + brw->attribs.Blend->blend_enable); - surf.ss0.writedisable_red = !brw->attribs.Color->ColorMask[0]; - surf.ss0.writedisable_green = !brw->attribs.Color->ColorMask[1]; - surf.ss0.writedisable_blue = !brw->attribs.Color->ColorMask[2]; - surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3]; + surf.ss0.writedisable_red = !brw->attribs.BlendColor.color[0]; + surf.ss0.writedisable_green = !brw->attribs.BlendColor.color[1]; + surf.ss0.writedisable_blue = !brw->attribs.BlendColor.color[2]; + surf.ss0.writedisable_alpha = !brw->attribs.BlendColor.color[3]; @@ -230,23 +236,24 @@ static void upload_wm_surfaces(struct brw_context *brw ) for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; + const struct brw_texture *texUnit = brw->attribs.Texture[i]; - /* _NEW_TEXTURE, BRW_NEW_TEXDATA + /* BRW_NEW_TEXTURE */ - if (texUnit->_ReallyEnabled && - intel_finalize_mipmap_tree(intel, i)) { + if (texUnit->base.refcount/*(texUnit->refcount > 0) == really used */) { - brw_update_texture_surface(ctx, i); + brw_update_texture_surface(brw, i); brw->wm.nr_surfaces = i+2; } - else if( texUnit->_ReallyEnabled && +#if 0 + else if( texUnit->refcount && texUnit->_Current == intel->frame_buffer_texobj ) { brw->wm.bind.surf_ss_offset[i+1] = brw->wm.bind.surf_ss_offset[0]; brw->wm.nr_surfaces = i+2; } +#endif else { brw->wm.bind.surf_ss_offset[i+1] = 0; } @@ -293,14 +300,12 @@ static void emit_reloc_wm_surfaces(struct brw_context *brw) } #endif - - const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, - .brw = BRW_NEW_CONTEXT, + .brw = (BRW_NEW_FRAMEBUFFER | + BRW_NEW_BLEND | + BRW_NEW_TEXTURE), .cache = 0 }, .update = upload_wm_surfaces, }; -#endif diff --git a/src/mesa/pipe/p_state.h b/src/mesa/pipe/p_state.h index 4e42838f1d..af65d365bf 100644 --- a/src/mesa/pipe/p_state.h +++ b/src/mesa/pipe/p_state.h @@ -94,6 +94,7 @@ struct pipe_rasterizer_state unsigned line_stipple_factor:8; /**< [1..256] actually */ unsigned line_stipple_pattern:16; unsigned bypass_clipping:1; + unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ float line_width; float point_size; /**< used when no per-vertex size */ @@ -139,6 +140,7 @@ struct pipe_shader_state { const struct tgsi_token *tokens; ubyte num_inputs; ubyte num_outputs; + ubyte input_map[PIPE_MAX_SHADER_INPUTS]; /* XXX this may be temporary */ ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ diff --git a/src/mesa/pipe/p_util.h b/src/mesa/pipe/p_util.h index e6d284d932..46edcf3075 100644 --- a/src/mesa/pipe/p_util.h +++ b/src/mesa/pipe/p_util.h @@ -360,6 +360,11 @@ static INLINE float LOG2(float val) #define CEILF(x) ((float) ceil(x)) #endif +static INLINE int align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + /* Convenient... */ extern void _mesa_printf(const char *str, ...); diff --git a/src/mesa/pipe/softpipe/sp_context.c b/src/mesa/pipe/softpipe/sp_context.c index dcf0444f6e..b62e691e87 100644 --- a/src/mesa/pipe/softpipe/sp_context.c +++ b/src/mesa/pipe/softpipe/sp_context.c @@ -55,8 +55,6 @@ static boolean softpipe_is_format_supported( struct pipe_context *pipe, enum pipe_format format, uint type ) { - struct softpipe_context *softpipe = softpipe_context( pipe ); - switch (type) { case PIPE_TEXTURE: /* softpipe supports all texture formats */ diff --git a/src/mesa/pipe/softpipe/sp_context.h b/src/mesa/pipe/softpipe/sp_context.h index 2c038de5f7..8fd44933f2 100644 --- a/src/mesa/pipe/softpipe/sp_context.h +++ b/src/mesa/pipe/softpipe/sp_context.h @@ -110,8 +110,6 @@ struct softpipe_context { struct vertex_info vertex_info; unsigned attr_mask; unsigned nr_frag_attrs; /**< number of active fragment attribs */ - boolean need_z; /**< produce quad/fragment Z values? */ - boolean need_w; /**< produce quad/fragment W values? */ int psize_slot; #if 0 diff --git a/src/mesa/pipe/softpipe/sp_headers.h b/src/mesa/pipe/softpipe/sp_headers.h index b9f2b2205a..0ae31d8796 100644 --- a/src/mesa/pipe/softpipe/sp_headers.h +++ b/src/mesa/pipe/softpipe/sp_headers.h @@ -73,6 +73,7 @@ struct quad_header { float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ const struct tgsi_interp_coef *coef; + const struct tgsi_interp_coef *posCoef; unsigned nr_attrs; }; diff --git a/src/mesa/pipe/softpipe/sp_prim_setup.c b/src/mesa/pipe/softpipe/sp_prim_setup.c index fc96f92af1..2ccf5e2624 100644 --- a/src/mesa/pipe/softpipe/sp_prim_setup.c +++ b/src/mesa/pipe/softpipe/sp_prim_setup.c @@ -36,10 +36,12 @@ #include "sp_context.h" #include "sp_headers.h" #include "sp_quad.h" +#include "sp_state.h" #include "sp_prim_setup.h" #include "pipe/draw/draw_private.h" #include "pipe/draw/draw_vertex.h" #include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" #define DEBUG_VERTS 0 @@ -80,8 +82,11 @@ struct setup_stage { float oneoverarea; struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef posCoef; /* For Z, W */ struct quad_header quad; + uint firstFpInput; /** Semantic type of first frag input */ + struct { int left[2]; /**< [0] = row0, [1] = row1 */ int right[2]; @@ -365,18 +370,17 @@ static boolean setup_sort_vertices( struct setup_stage *setup, * \param i which component of the slot (0..3) */ static void const_coeff( struct setup_stage *setup, - unsigned slot, - unsigned i ) + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { - assert(slot < PIPE_MAX_SHADER_INPUTS); assert(i <= 3); - setup->coef[slot].dadx[i] = 0; - setup->coef[slot].dady[i] = 0; + coef->dadx[i] = 0; + coef->dady[i] = 0; /* need provoking vertex info! */ - setup->coef[slot].a0[i] = setup->vprovoke->data[slot][i]; + coef->a0[i] = setup->vprovoke->data[vertSlot][i]; } @@ -385,19 +389,20 @@ static void const_coeff( struct setup_stage *setup, * for a triangle. */ static void tri_linear_coeff( struct setup_stage *setup, - unsigned slot, - unsigned i) + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { - float botda = setup->vmid->data[slot][i] - setup->vmin->data[slot][i]; - float majda = setup->vmax->data[slot][i] - setup->vmin->data[slot][i]; + float botda = setup->vmid->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; + float majda = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; float a = setup->ebot.dy * majda - botda * setup->emaj.dy; float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - - assert(slot < PIPE_MAX_SHADER_INPUTS); + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + assert(i <= 3); - setup->coef[slot].dadx[i] = a * setup->oneoverarea; - setup->coef[slot].dady[i] = b * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; /* calculate a0 as the value which would be sampled for the * fragment at (0,0), taking into account that we want to sample at @@ -411,9 +416,9 @@ static void tri_linear_coeff( struct setup_stage *setup, * to define a0 as the sample at a pixel center somewhere near vmin * instead - i'll switch to this later. */ - setup->coef[slot].a0[i] = (setup->vmin->data[slot][i] - - (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) + - setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f))); + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); /* _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", @@ -434,76 +439,144 @@ static void tri_linear_coeff( struct setup_stage *setup, * divide the interpolated value by the interpolated W at that fragment. */ static void tri_persp_coeff( struct setup_stage *setup, - unsigned slot, - unsigned i ) + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { - /* premultiply by 1/w: + /* premultiply by 1/w (v->data[0][3] is always W): */ - float mina = setup->vmin->data[slot][i] * setup->vmin->data[0][3]; - float mida = setup->vmid->data[slot][i] * setup->vmid->data[0][3]; - float maxa = setup->vmax->data[slot][i] * setup->vmax->data[0][3]; - + float mina = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; + float mida = setup->vmid->data[vertSlot][i] * setup->vmid->data[0][3]; + float maxa = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; float botda = mida - mina; float majda = maxa - mina; float a = setup->ebot.dy * majda - botda * setup->emaj.dy; float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; /* - printf("tri persp %d,%d: %f %f %f\n", slot, i, - setup->vmin->data[slot][i], - setup->vmid->data[slot][i], - setup->vmax->data[slot][i] + printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, + setup->vmin->data[vertSlot][i], + setup->vmid->data[vertSlot][i], + setup->vmax->data[vertSlot][i] ); */ - - assert(slot < PIPE_MAX_SHADER_INPUTS); assert(i <= 3); - setup->coef[slot].dadx[i] = a * setup->oneoverarea; - setup->coef[slot].dady[i] = b * setup->oneoverarea; - setup->coef[slot].a0[i] = (mina - - (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) + - setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f))); + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (mina - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); } /** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from posCoef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coeff(struct setup_stage *setup) +{ + /*X*/ + setup->coef[0].a0[0] = 0; + setup->coef[0].dadx[0] = 1.0; + setup->coef[0].dady[0] = 0.0; + /*Y*/ + if (setup->softpipe->rasterizer->origin_lower_left) { + /* y=0=bottom */ + const int winHeight = setup->softpipe->framebuffer.cbufs[0]->height; + setup->coef[0].a0[1] = winHeight - 1; + setup->coef[0].dady[1] = -1.0; + } + else { + /* y=0=top */ + setup->coef[0].a0[1] = 0.0; + setup->coef[0].dady[1] = 1.0; + } + setup->coef[0].dadx[1] = 0.0; + /*Z*/ + setup->coef[0].a0[2] = setup->posCoef.a0[2]; + setup->coef[0].dadx[2] = setup->posCoef.dadx[2]; + setup->coef[0].dady[2] = setup->posCoef.dady[2]; + /*w*/ + setup->coef[0].a0[3] = setup->posCoef.a0[3]; + setup->coef[0].dadx[3] = setup->posCoef.dadx[3]; + setup->coef[0].dady[3] = setup->posCoef.dady[3]; +} + + + +/** * Compute the setup->coef[] array dadx, dady, a0 values. * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. */ static void setup_tri_coefficients( struct setup_stage *setup ) { const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode; - unsigned slot, j; +#define USE_INPUT_MAP 0 +#if USE_INPUT_MAP + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; +#endif + uint fragSlot; /* z and w are done by linear interpolation: */ - tri_linear_coeff(setup, 0, 2); - tri_linear_coeff(setup, 0, 3); + tri_linear_coeff(setup, &setup->posCoef, 0, 2); + tri_linear_coeff(setup, &setup->posCoef, 0, 3); /* setup interpolation for all the remaining attributes: */ - for (slot = 1; slot < setup->quad.nr_attrs; slot++) { - switch (interp[slot]) { - case INTERP_CONSTANT: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, slot, j); - break; - - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - tri_linear_coeff(setup, slot, j); - break; - - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - tri_persp_coeff(setup, slot, j); - break; - - default: - /* invalid interp mode */ - assert(0); + for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) { + /* which vertex output maps to this fragment input: */ +#if !USE_INPUT_MAP + uint vertSlot; + if (setup->firstFpInput == TGSI_SEMANTIC_POSITION) { + if (fragSlot == 0) { + setup_fragcoord_coeff(setup); + continue; + } + vertSlot = fragSlot; + } + else { + vertSlot = fragSlot + 1; } + +#else + uint vertSlot = fs->input_map[fragSlot]; + + if (vertSlot == 0) { + /* special case: shader is reading gl_FragCoord */ + /* XXX with a new INTERP_POSITION token, we could just add a + * new case to the switch below. + */ + setup_fragcoord_coeff(setup); + } + else { +#endif + uint j; + switch (interp[vertSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + default: + /* invalid interp mode */ + assert(0); + } +#if USE_INPUT_MAP + } +#endif } } @@ -660,17 +733,18 @@ static void setup_tri( struct draw_stage *stage, * for a line. */ static void -line_linear_coeff(struct setup_stage *setup, unsigned slot, unsigned i) +line_linear_coeff(struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { - const float da = setup->vmax->data[slot][i] - setup->vmin->data[slot][i]; + const float da = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; const float dadx = da * setup->emaj.dx * setup->oneoverarea; const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef[slot].dadx[i] = dadx; - setup->coef[slot].dady[i] = dady; - setup->coef[slot].a0[i] - = (setup->vmin->data[slot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); } @@ -679,21 +753,21 @@ line_linear_coeff(struct setup_stage *setup, unsigned slot, unsigned i) * for a line. */ static void -line_persp_coeff(struct setup_stage *setup, unsigned slot, unsigned i) +line_persp_coeff(struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { /* XXX double-check/verify this arithmetic */ - const float a0 = setup->vmin->data[slot][i] * setup->vmin->data[0][3]; - const float a1 = setup->vmax->data[slot][i] * setup->vmin->data[0][3]; + const float a0 = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; + const float a1 = setup->vmax->data[vertSlot][i] * setup->vmin->data[0][3]; const float da = a1 - a0; const float dadx = da * setup->emaj.dx * setup->oneoverarea; const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef[slot].dadx[i] = dadx; - setup->coef[slot].dady[i] = dady; - setup->coef[slot].a0[i] - = (setup->vmin->data[slot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); - + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); } @@ -705,7 +779,8 @@ static INLINE void setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) { const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode; - unsigned slot, j; + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + unsigned fragSlot; /* use setup->vmin, vmax to point to vertices */ setup->vprovoke = prim->v[1]; @@ -720,31 +795,39 @@ setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) /* z and w are done by linear interpolation: */ - line_linear_coeff(setup, 0, 2); - line_linear_coeff(setup, 0, 3); + line_linear_coeff(setup, &setup->posCoef, 0, 2); + line_linear_coeff(setup, &setup->posCoef, 0, 3); /* setup interpolation for all the remaining attributes: */ - for (slot = 1; slot < setup->quad.nr_attrs; slot++) { - switch (interp[slot]) { - case INTERP_CONSTANT: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, slot, j); - break; - - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - line_linear_coeff(setup, slot, j); - break; - - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - line_persp_coeff(setup, slot, j); - break; - - default: - /* invalid interp mode */ - assert(0); + for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) { + /* which vertex output maps to this fragment input: */ + uint vertSlot = fs->input_map[fragSlot]; + + if (vertSlot == 0) { + /* special case: shader is reading gl_FragCoord */ + setup_fragcoord_coeff(setup); + } + else { + uint j; + switch (interp[vertSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + + default: + /* invalid interp mode */ + assert(0); + } } } } @@ -910,14 +993,15 @@ setup_line(struct draw_stage *stage, struct prim_header *prim) static void -point_persp_coeff(struct setup_stage *setup, const struct vertex_header *vert, - uint slot, uint i) +point_persp_coeff(struct setup_stage *setup, + const struct vertex_header *vert, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { - assert(slot < PIPE_MAX_SHADER_INPUTS); assert(i <= 3); - setup->coef[slot].dadx[i] = 0.0F; - setup->coef[slot].dady[i] = 0.0F; - setup->coef[slot].a0[i] = vert->data[slot][i] * vert->data[0][3]; + coef->dadx[i] = 0.0F; + coef->dady[i] = 0.0F; + coef->a0[i] = vert->data[vertSlot][i] * vert->data[0][3]; } @@ -930,6 +1014,7 @@ static void setup_point(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode; const struct vertex_header *v0 = prim->v[0]; const int sizeAttr = setup->softpipe->psize_slot; @@ -940,7 +1025,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; const float x = v0->data[0][0]; /* Note: data[0] is always position */ const float y = v0->data[0][1]; - unsigned slot, j; + uint fragSlot; /* For points, all interpolants are constant-valued. * However, for point sprites, we'll need to setup texcoords appropriately. @@ -959,22 +1044,36 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) * probably should be ruled out on that basis. */ setup->vprovoke = prim->v[0]; - const_coeff(setup, 0, 2); - const_coeff(setup, 0, 3); - for (slot = 1; slot < setup->quad.nr_attrs; slot++) { - switch (interp[slot]) { - case INTERP_CONSTANT: - /* fall-through */ - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, slot, j); - break; - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - point_persp_coeff(setup, v0, slot, j); - break; - default: - assert(0); + + /* setup Z, W */ + const_coeff(setup, &setup->posCoef, 0, 2); + const_coeff(setup, &setup->posCoef, 0, 3); + + for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) { + /* which vertex output maps to this fragment input: */ + uint vertSlot = fs->input_map[fragSlot]; + + if (vertSlot == 0) { + /* special case: shader is reading gl_FragCoord */ + setup_fragcoord_coeff(setup); + } + else { + uint j; + switch (interp[vertSlot]) { + case INTERP_CONSTANT: + /* fall-through */ + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + point_persp_coeff(setup, setup->vprovoke, + &setup->coef[fragSlot], vertSlot, j); + break; + default: + assert(0); + } } } @@ -1108,9 +1207,12 @@ static void setup_begin( struct draw_stage *stage ) { struct setup_stage *setup = setup_stage(stage); struct softpipe_context *sp = setup->softpipe; + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; setup->quad.nr_attrs = setup->softpipe->nr_frag_attrs; + setup->firstFpInput = fs->input_semantic_name[0]; + sp->quad.first->begin(sp->quad.first); } @@ -1151,6 +1253,7 @@ struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) setup->stage.destroy = render_destroy; setup->quad.coef = setup->coef; + setup->quad.posCoef = &setup->posCoef; return &setup->stage; } diff --git a/src/mesa/pipe/softpipe/sp_quad_earlyz.c b/src/mesa/pipe/softpipe/sp_quad_earlyz.c index 3abd1f1fb9..22ea99049f 100644 --- a/src/mesa/pipe/softpipe/sp_quad_earlyz.c +++ b/src/mesa/pipe/softpipe/sp_quad_earlyz.c @@ -47,9 +47,9 @@ earlyz_quad( { const float fx = (float) quad->x0; const float fy = (float) quad->y0; - const float dzdx = quad->coef[0].dadx[2]; - const float dzdy = quad->coef[0].dady[2]; - const float z0 = quad->coef[0].a0[2] + dzdx * fx + dzdy * fy; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; quad->outputs.depth[0] = z0; quad->outputs.depth[1] = z0 + dzdx; diff --git a/src/mesa/pipe/softpipe/sp_quad_fs.c b/src/mesa/pipe/softpipe/sp_quad_fs.c index 251b47341a..6e7e7eb074 100644 --- a/src/mesa/pipe/softpipe/sp_quad_fs.c +++ b/src/mesa/pipe/softpipe/sp_quad_fs.c @@ -74,15 +74,49 @@ quad_shade_stage(struct quad_stage *qs) } +/** + * Compute quad X,Y,Z,W for the four fragments in a quad. + * Note that we only need to "compute" X and Y for the upper-left fragment. + * We could do less work if we're not depth testing, or there's no + * perspective-corrected attributes, but that's seldom. + */ +static void +setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) +{ + uint chan; + /* do X */ + quadpos->xyzw[0].f[0] = x; + /* do Y */ + quadpos->xyzw[1].f[0] = y; + /* do Z and W for all fragments in the quad */ + for (chan = 2; chan < 4; chan++) { + const float dadx = coef->dadx[chan]; + const float dady = coef->dady[chan]; + const float a0 = coef->a0[chan] + dadx * x + dady * y; + quadpos->xyzw[chan].f[0] = a0; + quadpos->xyzw[chan].f[1] = a0 + dadx; + quadpos->xyzw[chan].f[2] = a0 + dady; + quadpos->xyzw[chan].f[3] = a0 + dadx + dady; + } +} + + typedef void (XSTDCALL *codegen_function)( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, float (*constant)[4], struct tgsi_exec_vector *temporary, - const struct tgsi_interp_coef *coef ); + const struct tgsi_interp_coef *coef +#if 0 + ,const struct tgsi_exec_vector *quadPos +#endif + ); + -/* This should be done by the fragment shader execution unit (code - * generated from the decl instructions). Do it here for now. +/** + * Execute fragment shader for the four fragments in the quad. */ static void shade_quad( @@ -91,28 +125,15 @@ shade_quad( { struct quad_shade_stage *qss = quad_shade_stage( qs ); struct softpipe_context *softpipe = qs->softpipe; - const float fx = (float) quad->x0; - const float fy = (float) quad->y0; struct tgsi_exec_machine *machine = &qss->machine; - /* Consts does not require 16 byte alignment. */ + /* Consts do not require 16 byte alignment. */ machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; machine->InterpCoefs = quad->coef; - machine->Inputs[0].xyzw[0].f[0] = fx; - machine->Inputs[0].xyzw[0].f[1] = fx + 1.0f; - machine->Inputs[0].xyzw[0].f[2] = fx; - machine->Inputs[0].xyzw[0].f[3] = fx + 1.0f; - - /* XXX for OpenGL we need to invert the Y pos here (y=0=top). - * but that'll mess up linear/perspective interpolation of other - * attributes... - */ - machine->Inputs[0].xyzw[1].f[0] = fy; - machine->Inputs[0].xyzw[1].f[1] = fy; - machine->Inputs[0].xyzw[1].f[2] = fy + 1.0f; - machine->Inputs[0].xyzw[1].f[3] = fy + 1.0f; + /* Compute X, Y, Z, W vals for this quad */ + setup_pos_vector(quad->posCoef, quad->x0, quad->y0, &machine->QuadPos); /* run shader */ #if defined(__i386__) || defined(__386__) @@ -123,7 +144,11 @@ shade_quad( machine->Outputs, machine->Consts, machine->Temps, - machine->InterpCoefs ); + machine->InterpCoefs +#if 0 + ,machine->QuadPos +#endif + ); quad->mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); } else diff --git a/src/mesa/pipe/softpipe/sp_quad_stipple.c b/src/mesa/pipe/softpipe/sp_quad_stipple.c index 04d95989c4..0c42963dfe 100644 --- a/src/mesa/pipe/softpipe/sp_quad_stipple.c +++ b/src/mesa/pipe/softpipe/sp_quad_stipple.c @@ -22,10 +22,18 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) if (quad->prim == PRIM_TRI) { struct softpipe_context *softpipe = qs->softpipe; /* need to invert Y to index into OpenGL's stipple pattern */ - const int y0 = softpipe->framebuffer.cbufs[0]->height - 1 - quad->y0; - const int y1 = y0 - 1; - const unsigned stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; - const unsigned stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + int y0, y1; + uint stipple0, stipple1; + if (softpipe->rasterizer->origin_lower_left) { + y0 = softpipe->framebuffer.cbufs[0]->height - 1 - quad->y0; + y1 = y0 - 1; + } + else { + y0 = quad->y0; + y1 = y0 + 1; + } + stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; + stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; #if 1 const int col0 = quad->x0 % 32; diff --git a/src/mesa/pipe/softpipe/sp_state_derived.c b/src/mesa/pipe/softpipe/sp_state_derived.c index c4f1a0a01a..736ac1c33b 100644 --- a/src/mesa/pipe/softpipe/sp_state_derived.c +++ b/src/mesa/pipe/softpipe/sp_state_derived.c @@ -51,18 +51,11 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe ) memset(vinfo, 0, sizeof(*vinfo)); - if (softpipe->depth_stencil->depth.enabled) - softpipe->need_z = TRUE; - else - softpipe->need_z = FALSE; - softpipe->need_w = FALSE; if (fs->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { /* Need Z if depth test is enabled or the fragment program uses the * fragment position (XYZW). */ - softpipe->need_z = TRUE; - softpipe->need_w = TRUE; } softpipe->psize_slot = -1; @@ -121,7 +114,6 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe ) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ draw_emit_vertex_attr(vinfo, FORMAT_4F, INTERP_PERSPECTIVE); - softpipe->need_w = TRUE; break; default: @@ -129,7 +121,11 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe ) } } +#if 00 softpipe->nr_frag_attrs = vinfo->num_attribs; +#else + softpipe->nr_frag_attrs = fs->num_inputs; +#endif /* We want these after all other attribs since they won't get passed * to the fragment shader. All prior vertex output attribs should match diff --git a/src/mesa/pipe/softpipe/sp_texture.c b/src/mesa/pipe/softpipe/sp_texture.c index 2dd1add6f7..44512e4281 100644 --- a/src/mesa/pipe/softpipe/sp_texture.c +++ b/src/mesa/pipe/softpipe/sp_texture.c @@ -52,10 +52,6 @@ static unsigned minify( unsigned d ) return MAX2(1, d>>1); } -static int align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} static void diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.c b/src/mesa/pipe/tgsi/exec/tgsi_exec.c index 8636271a34..1f43f3643e 100644 --- a/src/mesa/pipe/tgsi/exec/tgsi_exec.c +++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.c @@ -1352,8 +1352,8 @@ linear_interpolation( unsigned attrib, unsigned chan ) { - const float x = mach->Inputs[0].xyzw[0].f[0]; - const float y = mach->Inputs[0].xyzw[1].f[0]; + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; const float dadx = mach->InterpCoefs[attrib].dadx[chan]; const float dady = mach->InterpCoefs[attrib].dady[chan]; const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; @@ -1369,15 +1369,17 @@ perspective_interpolation( unsigned attrib, unsigned chan ) { - const float x = mach->Inputs[0].xyzw[0].f[0]; - const float y = mach->Inputs[0].xyzw[1].f[0]; + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; const float dadx = mach->InterpCoefs[attrib].dadx[chan]; const float dady = mach->InterpCoefs[attrib].dady[chan]; const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - mach->Inputs[attrib].xyzw[chan].f[0] = a0 / mach->Inputs[0].xyzw[3].f[0]; - mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / mach->Inputs[0].xyzw[3].f[1]; - mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / mach->Inputs[0].xyzw[3].f[2]; - mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / mach->Inputs[0].xyzw[3].f[3]; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; } @@ -1402,17 +1404,6 @@ exec_declaration( last = decl->u.DeclarationRange.Last; mask = decl->Declaration.UsageMask; - /* Do not touch WPOS.xy */ - if( first == 0 ) { - mask &= ~TGSI_WRITEMASK_XY; - if( mask == TGSI_WRITEMASK_NONE ) { - first++; - if( first > last ) { - return; - } - } - } - switch( decl->Interpolation.Interpolate ) { case TGSI_INTERPOLATE_CONSTANT: interp = constant_interpolation; diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.h b/src/mesa/pipe/tgsi/exec/tgsi_exec.h index e7952a08e3..db92e282df 100644 --- a/src/mesa/pipe/tgsi/exec/tgsi_exec.h +++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.h @@ -177,6 +177,7 @@ struct tgsi_exec_machine /* FRAGMENT processor only. */ const struct tgsi_interp_coef *InterpCoefs; + struct tgsi_exec_vector QuadPos; /* Conditional execution masks */ uint CondMask; /**< For IF/ELSE/ENDIF */ diff --git a/src/mesa/pipe/xlib/xm_winsys_aub.c b/src/mesa/pipe/xlib/xm_winsys_aub.c index ee3c2d6181..0348c2ad40 100644 --- a/src/mesa/pipe/xlib/xm_winsys_aub.c +++ b/src/mesa/pipe/xlib/xm_winsys_aub.c @@ -493,6 +493,13 @@ static void aub_i965_batch_reloc( struct brw_winsys *sws, iws->data[iws->nr++] = aub_bo(buf)->offset + delta; } +static unsigned aub_i965_get_buffer_offset( struct brw_winsys *sws, + struct pipe_buffer_handle *buf, + unsigned access_flags ) +{ + return aub_bo(buf)->offset; +} + static void aub_i965_batch_flush( struct brw_winsys *sws, @@ -605,6 +612,7 @@ xmesa_create_i965simple( struct pipe_winsys *winsys ) iws->winsys.batch_reloc = aub_i965_batch_reloc; iws->winsys.batch_flush = aub_i965_batch_flush; iws->winsys.buffer_subdata_typed = aub_i965_buffer_subdata_typed; + iws->winsys.get_buffer_offset = aub_i965_get_buffer_offset; iws->pipe_winsys = winsys; diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index 2a7128dd27..5c6b89d78c 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -77,6 +77,8 @@ static void update_raster_state( struct st_context *st ) uint i; memset(&raster, 0, sizeof(raster)); + + raster.origin_lower_left = 1; /* Always true for OpenGL */ /* _NEW_POLYGON, _NEW_BUFFERS */ diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 052b6dd144..6241e70b55 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -35,6 +35,7 @@ #include "st_context.h" #include "st_cache.h" #include "st_atom.h" +#include "st_program.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" @@ -116,17 +117,23 @@ gl_filter_to_img_filter(GLenum filter) static void update_samplers(struct st_context *st) { - GLuint u; + const struct st_fragment_program *fs = st->fp; + GLuint su; - for (u = 0; u < st->ctx->Const.MaxTextureImageUnits; u++) { - const struct gl_texture_object *texobj - = st->ctx->Texture.Unit[u]._Current; + /* loop over sampler units (aka tex image units) */ + for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) { struct pipe_sampler_state sampler; const struct cso_sampler *cso; memset(&sampler, 0, sizeof(sampler)); - if (texobj) { + if (fs->Base.Base.SamplersUsed & (1 << su)) { + GLuint texUnit = fs->Base.Base.SamplerUnits[su]; + const struct gl_texture_object *texobj + = st->ctx->Texture.Unit[texUnit]._Current; + + assert(texobj); + sampler.wrap_s = gl_wrap_to_sp(texobj->WrapS); sampler.wrap_t = gl_wrap_to_sp(texobj->WrapT); sampler.wrap_r = gl_wrap_to_sp(texobj->WrapR); @@ -138,7 +145,7 @@ update_samplers(struct st_context *st) if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB) sampler.normalized_coords = 1; - sampler.lod_bias = st->ctx->Texture.Unit[u].LodBias; + sampler.lod_bias = st->ctx->Texture.Unit[su].LodBias; #if 1 sampler.min_lod = texobj->MinLod; sampler.max_lod = texobj->MaxLod; @@ -166,10 +173,10 @@ update_samplers(struct st_context *st) cso = st_cached_sampler_state(st, &sampler); - if (cso != st->state.sampler[u]) { + if (cso != st->state.sampler[su]) { /* state has changed */ - st->state.sampler[u] = cso; - st->pipe->bind_sampler_state(st->pipe, u, cso->data); + st->state.sampler[su] = cso; + st->pipe->bind_sampler_state(st->pipe, su, cso->data); } } } diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 4ec10badad..33372b0f39 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -151,8 +151,7 @@ find_translated_vp(struct st_context *st, { static const GLuint UNUSED = ~0; struct translated_vertex_program *xvp; - const GLbitfield fragInputsRead - = stfp->Base.Base.InputsRead | FRAG_BIT_WPOS; + const GLbitfield fragInputsRead = stfp->Base.Base.InputsRead; /* * Translate fragment program if needed. @@ -206,6 +205,7 @@ find_translated_vp(struct st_context *st, if (xvp->serialNo != stvp->serialNo) { GLuint outAttr, dummySlot; const GLbitfield outputsWritten = stvp->Base.Base.OutputsWritten; + GLuint numVpOuts = 0; /* Compute mapping of vertex program outputs to slots, which depends * on the fragment program's input->slot mapping. @@ -214,11 +214,24 @@ find_translated_vp(struct st_context *st, /* set default: */ xvp->output_to_slot[outAttr] = UNUSED; - if (outputsWritten & (1 << outAttr)) { + if (outAttr == VERT_RESULT_HPOS) { + /* always put xformed position into slot zero */ + xvp->output_to_slot[VERT_RESULT_HPOS] = 0; + numVpOuts++; + } + else if (outputsWritten & (1 << outAttr)) { /* see if the frag prog wants this vert output */ - GLint fpIn = vp_out_to_fp_in(outAttr); - if (fpIn >= 0) { - xvp->output_to_slot[outAttr] = stfp->input_to_slot[fpIn]; + GLint fpInAttrib = vp_out_to_fp_in(outAttr); + if (fpInAttrib >= 0) { + GLuint fpInSlot = stfp->input_to_slot[fpInAttrib]; + GLuint vpOutSlot = stfp->fs->state.input_map[fpInSlot]; + xvp->output_to_slot[outAttr] = vpOutSlot; + numVpOuts++; + } + else if (outAttr == VERT_RESULT_BFC0 || + outAttr == VERT_RESULT_BFC1) { + /* backface colors go into last slots */ + xvp->output_to_slot[outAttr] = numVpOuts++; } } } diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 254740ff20..c40f75417f 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -243,6 +243,10 @@ st_new_renderbuffer_fb(enum pipe_format format) strb->Base.InternalFormat = GL_DEPTH24_STENCIL8_EXT; strb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT; break; + case PIPE_FORMAT_S8_UNORM: + strb->Base.InternalFormat = GL_STENCIL_INDEX8_EXT; + strb->Base._BaseFormat = GL_STENCIL_INDEX; + break; case PIPE_FORMAT_R16G16B16A16_SNORM: strb->Base.InternalFormat = GL_RGBA16; strb->Base._BaseFormat = GL_RGBA; diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index b392edf16d..bccabd8004 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -585,6 +585,20 @@ make_temp_decl( } +static struct tgsi_full_declaration +make_sampler_decl(GLuint index) +{ + struct tgsi_full_declaration decl; + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.Declaration.Declare = TGSI_DECLARE_RANGE; + decl.u.DeclarationRange.First = index; + decl.u.DeclarationRange.Last = index; + return decl; +} + + + /** * Find the temporaries which are used in the given program. */ @@ -675,44 +689,22 @@ tgsi_translate_mesa_program( if (procType == TGSI_PROCESSOR_FRAGMENT) { for (i = 0; i < numInputs; i++) { struct tgsi_full_declaration fulldecl; - switch (inputSemanticName[i]) { - case TGSI_SEMANTIC_POSITION: - /* Fragment XY pos */ - fulldecl = make_input_decl(i, - GL_TRUE, TGSI_INTERPOLATE_CONSTANT, - TGSI_WRITEMASK_XY, - GL_TRUE, TGSI_SEMANTIC_POSITION, 0 ); - ti += tgsi_build_full_declaration( - &fulldecl, - &tokens[ti], - header, - maxTokens - ti ); - /* Fragment ZW pos */ - fulldecl = make_input_decl(i, - GL_TRUE, TGSI_INTERPOLATE_LINEAR, - TGSI_WRITEMASK_ZW, - GL_TRUE, TGSI_SEMANTIC_POSITION, 0 ); - ti += tgsi_build_full_declaration(&fulldecl, - &tokens[ti], - header, - maxTokens - ti ); - break; - default: - fulldecl = make_input_decl(i, - GL_TRUE, interpMode[i], - TGSI_WRITEMASK_XYZW, - GL_TRUE, inputSemanticName[i], - inputSemanticIndex[i]); - ti += tgsi_build_full_declaration(&fulldecl, - &tokens[ti], - header, - maxTokens - ti ); - break; - } + fulldecl = make_input_decl(i, + GL_TRUE, interpMode[i], + TGSI_WRITEMASK_XYZW, + GL_TRUE, inputSemanticName[i], + inputSemanticIndex[i]); + ti += tgsi_build_full_declaration(&fulldecl, + &tokens[ti], + header, + maxTokens - ti ); } } else { /* vertex prog */ + /* XXX: this could probaby be merged with the clause above. + * the only difference is the semantic tags. + */ for (i = 0; i < numInputs; i++) { struct tgsi_full_declaration fulldecl; fulldecl = make_input_decl(i, @@ -810,6 +802,19 @@ tgsi_translate_mesa_program( } } + /* texture samplers */ + for (i = 0; i < 8; i++) { + if (program->SamplersUsed & (1 << i)) { + struct tgsi_full_declaration fulldecl; + fulldecl = make_sampler_decl( i ); + ti += tgsi_build_full_declaration(&fulldecl, + &tokens[ti], + header, + maxTokens - ti ); + } + } + + for( i = 0; i < program->NumInstructions; i++ ) { compile_instruction( &program->Instructions[i], diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index e64bf14d56..fe22233c93 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -47,7 +47,7 @@ #include "st_mesa_to_tgsi.h" -#define TGSI_DEBUG 0 +#define TGSI_DEBUG 01 /** @@ -283,16 +283,17 @@ st_translate_fragment_program(struct st_context *st, const struct cso_fragment_shader *cso; GLuint interpMode[16]; /* XXX size? */ GLuint attr; - GLbitfield inputsRead = stfp->Base.Base.InputsRead; - - /* For software rendering, we always need the fragment input position - * in order to calculate interpolated values. - * For i915, we always want to emit the semantic info for position. - */ - inputsRead |= FRAG_BIT_WPOS; + const GLbitfield inputsRead = stfp->Base.Base.InputsRead; + GLuint vslot = 0; memset(&fs, 0, sizeof(fs)); + /* which vertex output goes to the first fragment input: */ + if (inputsRead & FRAG_BIT_WPOS) + vslot = 0; + else + vslot = 1; + /* * Convert Mesa program inputs to TGSI input register semantics. */ @@ -300,15 +301,17 @@ st_translate_fragment_program(struct st_context *st, if (inputsRead & (1 << attr)) { const GLuint slot = fs.num_inputs; - fs.num_inputs++; - defaultInputMapping[attr] = slot; + fs.input_map[slot] = vslot++; + + fs.num_inputs++; + switch (attr) { case FRAG_ATTRIB_WPOS: fs.input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; fs.input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; break; case FRAG_ATTRIB_COL0: fs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; |