summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--progs/vp/vp-tris.c6
-rw-r--r--src/mesa/main/texenvprogram.c8
-rw-r--r--src/mesa/pipe/i915simple/i915_fpc_translate.c5
-rw-r--r--src/mesa/pipe/i915simple/i915_texture.c4
-rw-r--r--src/mesa/pipe/i965simple/Makefile4
-rw-r--r--src/mesa/pipe/i965simple/brw_batch.h3
-rw-r--r--src/mesa/pipe/i965simple/brw_cc.c2
-rw-r--r--src/mesa/pipe/i965simple/brw_clip_state.c4
-rw-r--r--src/mesa/pipe/i965simple/brw_context.c1
-rw-r--r--src/mesa/pipe/i965simple/brw_context.h101
-rw-r--r--src/mesa/pipe/i965simple/brw_curbe.c79
-rw-r--r--src/mesa/pipe/i965simple/brw_draw.c1
-rw-r--r--src/mesa/pipe/i965simple/brw_draw_upload.c21
-rw-r--r--src/mesa/pipe/i965simple/brw_eu.h11
-rw-r--r--src/mesa/pipe/i965simple/brw_eu_emit.c18
-rw-r--r--src/mesa/pipe/i965simple/brw_gs_state.c3
-rw-r--r--src/mesa/pipe/i965simple/brw_misc_state.c10
-rw-r--r--src/mesa/pipe/i965simple/brw_sf.c213
-rw-r--r--src/mesa/pipe/i965simple/brw_sf.h17
-rw-r--r--src/mesa/pipe/i965simple/brw_sf_emit.c340
-rw-r--r--src/mesa/pipe/i965simple/brw_sf_state.c124
-rw-r--r--src/mesa/pipe/i965simple/brw_shader_info.c49
-rw-r--r--src/mesa/pipe/i965simple/brw_state.c19
-rw-r--r--src/mesa/pipe/i965simple/brw_state.h7
-rw-r--r--src/mesa/pipe/i965simple/brw_state_cache.c5
-rw-r--r--src/mesa/pipe/i965simple/brw_state_pool.c9
-rw-r--r--src/mesa/pipe/i965simple/brw_state_upload.c41
-rw-r--r--src/mesa/pipe/i965simple/brw_tex_layout.c26
-rw-r--r--src/mesa/pipe/i965simple/brw_vs.c10
-rw-r--r--src/mesa/pipe/i965simple/brw_vs_constval.c223
-rw-r--r--src/mesa/pipe/i965simple/brw_vs_emit.c38
-rw-r--r--src/mesa/pipe/i965simple/brw_vs_state.c2
-rw-r--r--src/mesa/pipe/i965simple/brw_vtbl.c149
-rw-r--r--src/mesa/pipe/i965simple/brw_winsys.h9
-rw-r--r--src/mesa/pipe/i965simple/brw_wm.c169
-rw-r--r--src/mesa/pipe/i965simple/brw_wm.h184
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_decl.c377
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_fp.c1007
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_glsl.c2141
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_sampler_state.c9
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_state.c50
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_surface_state.c101
-rw-r--r--src/mesa/pipe/p_state.h2
-rw-r--r--src/mesa/pipe/p_util.h5
-rw-r--r--src/mesa/pipe/softpipe/sp_context.c2
-rw-r--r--src/mesa/pipe/softpipe/sp_context.h2
-rw-r--r--src/mesa/pipe/softpipe/sp_headers.h1
-rw-r--r--src/mesa/pipe/softpipe/sp_prim_setup.c347
-rw-r--r--src/mesa/pipe/softpipe/sp_quad_earlyz.c6
-rw-r--r--src/mesa/pipe/softpipe/sp_quad_fs.c65
-rw-r--r--src/mesa/pipe/softpipe/sp_quad_stipple.c16
-rw-r--r--src/mesa/pipe/softpipe/sp_state_derived.c12
-rw-r--r--src/mesa/pipe/softpipe/sp_texture.c4
-rw-r--r--src/mesa/pipe/tgsi/exec/tgsi_exec.c29
-rw-r--r--src/mesa/pipe/tgsi/exec/tgsi_exec.h1
-rw-r--r--src/mesa/pipe/xlib/xm_winsys_aub.c8
-rw-r--r--src/mesa/state_tracker/st_atom_rasterizer.c2
-rw-r--r--src/mesa/state_tracker/st_atom_sampler.c25
-rw-r--r--src/mesa/state_tracker/st_atom_shader.c25
-rw-r--r--src/mesa/state_tracker/st_cb_fbo.c4
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.c73
-rw-r--r--src/mesa/state_tracker/st_program.c25
62 files changed, 2325 insertions, 3929 deletions
diff --git a/progs/vp/vp-tris.c b/progs/vp/vp-tris.c
index e5be65e78c..f9e6cdad74 100644
--- a/progs/vp/vp-tris.c
+++ b/progs/vp/vp-tris.c
@@ -90,7 +90,9 @@ static void Init( void )
}
fprintf(stderr, "%.*s\n", sz, buf);
-
+
+ glEnable(GL_VERTEX_PROGRAM_NV);
+
glGenProgramsARB(1, &prognum);
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, prognum);
@@ -168,8 +170,6 @@ static void Display( void )
glClearColor(0.3, 0.3, 0.3, 1);
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
- glEnable(GL_VERTEX_PROGRAM_NV);
-
glBegin(GL_TRIANGLES);
diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c
index efb3b35f6a..d866d10017 100644
--- a/src/mesa/main/texenvprogram.c
+++ b/src/mesa/main/texenvprogram.c
@@ -936,10 +936,16 @@ static void load_texture( struct texenv_fragment_program *p, GLuint unit )
/* TODO: Use D0_MASK_XY where possible.
*/
- if (p->state->unit[unit].enabled)
+ if (p->state->unit[unit].enabled) {
p->src_texture[unit] = emit_texld( p, OPCODE_TXP,
tmp, WRITEMASK_XYZW,
unit, dim, texcoord );
+ p->program->Base.SamplersUsed |= (1 << unit);
+ /* This identity mapping should already be in place
+ * (see _mesa_init_program_struct()) but let's be safe.
+ */
+ p->program->Base.SamplerUnits[unit] = unit;
+ }
else
p->src_texture[unit] = get_zero(p);
}
diff --git a/src/mesa/pipe/i915simple/i915_fpc_translate.c b/src/mesa/pipe/i915simple/i915_fpc_translate.c
index 1cd554250c..d517b88acc 100644
--- a/src/mesa/pipe/i915simple/i915_fpc_translate.c
+++ b/src/mesa/pipe/i915simple/i915_fpc_translate.c
@@ -928,8 +928,9 @@ i915_translate_instructions(struct i915_fp_compile *p,
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
- /* XXX no-op? */
- assert(0);
+ /* This is a no-op. We'll get immediates from the usual constant/
+ * uniform buffer.
+ */
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
diff --git a/src/mesa/pipe/i915simple/i915_texture.c b/src/mesa/pipe/i915simple/i915_texture.c
index fefd105adf..44f72e63cc 100644
--- a/src/mesa/pipe/i915simple/i915_texture.c
+++ b/src/mesa/pipe/i915simple/i915_texture.c
@@ -47,10 +47,6 @@ static unsigned minify( unsigned d )
return MAX2(1, d>>1);
}
-static int align(int value, int alignment)
-{
- return (value + alignment - 1) & ~(alignment - 1);
-}
static void
diff --git a/src/mesa/pipe/i965simple/Makefile b/src/mesa/pipe/i965simple/Makefile
index eda5afaea5..48c00ab50b 100644
--- a/src/mesa/pipe/i965simple/Makefile
+++ b/src/mesa/pipe/i965simple/Makefile
@@ -31,6 +31,7 @@ DRIVER_SOURCES = \
brw_sf.c \
brw_sf_emit.c \
brw_sf_state.c \
+ brw_shader_info.c \
brw_state.c \
brw_state_batch.c \
brw_state_cache.c \
@@ -40,12 +41,11 @@ DRIVER_SOURCES = \
brw_urb.c \
brw_util.c \
brw_vs.c \
- brw_vs_constval.c \
brw_vs_emit.c \
brw_vs_state.c \
- brw_vtbl.c \
brw_wm.c \
brw_wm_iz.c \
+ brw_wm_decl.c \
brw_wm_glsl.c \
brw_wm_sampler_state.c \
brw_wm_state.c \
diff --git a/src/mesa/pipe/i965simple/brw_batch.h b/src/mesa/pipe/i965simple/brw_batch.h
index 7c778f360b..bef69ac871 100644
--- a/src/mesa/pipe/i965simple/brw_batch.h
+++ b/src/mesa/pipe/i965simple/brw_batch.h
@@ -36,7 +36,7 @@
#define INTEL_BATCH_CLIPRECTS 0x2
#define BEGIN_BATCH( dwords, relocs ) \
- (brw->batch_start = brw->winsys->batch_start(brw->winsys, dwords, relocs))
+ brw->winsys->batch_start(brw->winsys, dwords, relocs)
#define OUT_BATCH( dword ) \
brw->winsys->batch_dword(brw->winsys, dword)
@@ -50,7 +50,6 @@
*/
#define FLUSH_BATCH(fence) do { \
brw->winsys->batch_flush(brw->winsys, fence); \
- brw->batch_start = NULL; \
brw->hardware_dirty = ~0; \
} while (0)
diff --git a/src/mesa/pipe/i965simple/brw_cc.c b/src/mesa/pipe/i965simple/brw_cc.c
index fc7fdba53f..6cc1505311 100644
--- a/src/mesa/pipe/i965simple/brw_cc.c
+++ b/src/mesa/pipe/i965simple/brw_cc.c
@@ -142,7 +142,7 @@ static void upload_cc_vp( struct brw_context *brw )
const struct brw_tracked_state brw_cc_vp = {
.dirty = {
- .brw = BRW_NEW_CONTEXT,
+ .brw = BRW_NEW_SCENE,
.cache = 0
},
.update = upload_cc_vp
diff --git a/src/mesa/pipe/i965simple/brw_clip_state.c b/src/mesa/pipe/i965simple/brw_clip_state.c
index 51a4666a0b..ea5c05a279 100644
--- a/src/mesa/pipe/i965simple/brw_clip_state.c
+++ b/src/mesa/pipe/i965simple/brw_clip_state.c
@@ -32,7 +32,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-
+#include "pipe/p_util.h"
static void upload_clip_unit( struct brw_context *brw )
@@ -43,7 +43,7 @@ static void upload_clip_unit( struct brw_context *brw )
/* CACHE_NEW_CLIP_PROG */
clip.thread0.grf_reg_count =
- ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1;
+ align(brw->clip.prog_data->total_grf, 16) / 16 - 1;
clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6;
clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length;
diff --git a/src/mesa/pipe/i965simple/brw_context.c b/src/mesa/pipe/i965simple/brw_context.c
index e69ba6938e..5e58701e91 100644
--- a/src/mesa/pipe/i965simple/brw_context.c
+++ b/src/mesa/pipe/i965simple/brw_context.c
@@ -237,7 +237,6 @@ struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys,
brw->pci_id = pci_id;
brw->dirty = ~0;
brw->hardware_dirty = ~0;
- brw->batch_start = NULL;
memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind));
diff --git a/src/mesa/pipe/i965simple/brw_context.h b/src/mesa/pipe/i965simple/brw_context.h
index 53f66cd6a9..318c6a7049 100644
--- a/src/mesa/pipe/i965simple/brw_context.h
+++ b/src/mesa/pipe/i965simple/brw_context.h
@@ -119,7 +119,6 @@
* Handles blending and (presumably) depth and stencil testing.
*/
-#define BRW_FALLBACK_TEXTURE 0x1
#define BRW_MAX_CURBE (32*16)
struct brw_context;
@@ -147,16 +146,13 @@ struct brw_winsys;
/* Raised for other internal events:
*/
#define BRW_NEW_URB_FENCE 0x10000
-#define BRW_NEW_INPUT_DIMENSIONS 0x20000
+#define BRW_NEW_PSP 0x20000
#define BRW_NEW_CURBE_OFFSETS 0x40000
#define BRW_NEW_REDUCED_PRIMITIVE 0x80000
#define BRW_NEW_PRIMITIVE 0x100000
-#define BRW_NEW_CONTEXT 0x200000
-#define BRW_NEW_WM_INPUT_DIMENSIONS 0x400000
-#define BRW_NEW_INPUT_VARYING 0x800000
-#define BRW_NEW_PSP 0x1000000
+#define BRW_NEW_SCENE 0x200000
+#define BRW_NEW_SF_LINKAGE 0x400000
-#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1))
extern int BRW_DEBUG;
#define DEBUG_TEXTURE 0x1
@@ -198,23 +194,47 @@ struct brw_state_flags {
unsigned brw;
};
+
+struct brw_shader_info {
+ int nr_regs[8]; /* TGSI_FILE_* */
+};
+
+
+
struct brw_vertex_program {
struct pipe_shader_state program;
- unsigned id;
- unsigned param_state; /* flags indicating state tracked by params */
+ struct brw_shader_info info;
+ int id;
};
struct brw_fragment_program {
struct pipe_shader_state program;
- unsigned id;
- unsigned param_state; /* flags indicating state tracked by params */
+ struct brw_shader_info info;
+
+ boolean UsesDepth;
boolean UsesKill;
boolean ComputesDepth;
+ int id;
};
+
+
+struct pipe_setup_linkage {
+ struct {
+ unsigned vp_output:5;
+ unsigned interp_mode:4;
+ unsigned bf_vp_output:5;
+ } fp_input[PIPE_MAX_SHADER_INPUTS];
+
+ unsigned fp_input_count:5;
+ unsigned max_vp_output:5;
+};
+
+
+
struct brw_texture {
struct pipe_texture base;
@@ -248,6 +268,12 @@ struct brw_texture {
* corresponding to a different brw_wm_prog_key struct, with different
* compiled programs:
*/
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+
struct brw_wm_prog_data {
unsigned curb_read_length;
unsigned urb_read_length;
@@ -256,13 +282,14 @@ struct brw_wm_prog_data {
unsigned total_grf;
unsigned total_scratch;
- unsigned nr_params;
- boolean error;
-
- /* Pointer to tracked values (only valid once
- * _mesa_load_state_parameters has been called at runtime).
+ /* Internally generated constants for the CURBE. These are loaded
+ * ahead of the data from the constant buffer.
*/
- const float *param[BRW_MAX_CURBE];
+ const float internal_const[8];
+ unsigned nr_internal_consts;
+ unsigned max_const;
+
+ boolean error;
};
struct brw_sf_prog_data {
@@ -298,19 +325,14 @@ struct brw_vs_prog_data {
unsigned inputs_read;
+ unsigned max_const;
+
/* Used for calculating urb partitions:
*/
unsigned urb_entry_size;
};
-/* Size == 0 if output either not written, or always [0,0,0,1]
- */
-struct brw_vs_ouput_sizes {
- ubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
-};
-
-
#define BRW_MAX_TEX_UNIT 8
#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1
@@ -374,8 +396,6 @@ struct brw_cache {
struct brw_tracked_state {
struct brw_state_flags dirty;
void (*update)( struct brw_context *brw );
- void (*emit_reloc)( struct brw_context *brw );
- boolean always_update;
};
@@ -455,8 +475,6 @@ struct brw_context
struct {
struct brw_state_flags dirty;
- struct brw_tracked_state **atoms;
- unsigned nr_atoms;
} state;
@@ -489,34 +507,23 @@ struct brw_context
/* Arrays with buffer objects to copy non-bufferobj arrays into
* for upload:
*/
- struct pipe_vertex_buffer vbo_array[PIPE_ATTRIB_MAX];
+ struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX];
struct brw_vertex_element inputs[PIPE_ATTRIB_MAX];
#define BRW_NR_UPLOAD_BUFS 17
#define BRW_UPLOAD_INIT_SIZE (128*1024)
- struct {
- struct pipe_buffer_handle *vbo[BRW_NR_UPLOAD_BUFS];
- unsigned buf;
- unsigned offset;
- unsigned size;
- unsigned wrap;
- } upload;
-
/* Summary of size and varying of active arrays, so we can check
* for changes to this state:
*/
struct brw_vertex_info info;
- int last_vb;
} vb;
- unsigned *batch_start;
unsigned hardware_dirty;
unsigned dirty;
unsigned pci_id;
-
/* BRW_NEW_URB_ALLOCATIONS:
*/
struct {
@@ -557,11 +564,6 @@ struct brw_context
unsigned vs_size;
unsigned total_size;
- /* Dynamic tracker which changes to reflect the state referenced
- * by active fp and vp program parameters:
- */
- struct brw_tracked_state tracked_state;
-
unsigned gs_offset;
float *last_buf;
@@ -595,6 +597,8 @@ struct brw_context
struct {
struct brw_sf_prog_data *prog_data;
+ struct pipe_setup_linkage linkage;
+
unsigned prog_gs_offset;
unsigned vp_gs_offset;
unsigned state_gs_offset;
@@ -602,11 +606,8 @@ struct brw_context
struct {
struct brw_wm_prog_data *prog_data;
- struct brw_wm_compile *compile_data;
- /* Input sizes, calculated from active vertex program:
- */
- unsigned input_size_masks[4];
+// struct brw_wm_compiler *compile_data;
/**
@@ -667,8 +668,6 @@ void brw_destroy_state(struct brw_context *brw);
* brw_tex.c
*/
void brwUpdateTextureState( struct brw_context *brw );
-void brw_FrameBufferTexInit( struct brw_context *brw );
-void brw_FrameBufferTexDestroy( struct brw_context *brw );
/* brw_urb.c
diff --git a/src/mesa/pipe/i965simple/brw_curbe.c b/src/mesa/pipe/i965simple/brw_curbe.c
index 0894e82d56..b943a7af98 100644
--- a/src/mesa/pipe/i965simple/brw_curbe.c
+++ b/src/mesa/pipe/i965simple/brw_curbe.c
@@ -35,6 +35,9 @@
#include "brw_defines.h"
#include "brw_state.h"
#include "brw_util.h"
+#include "brw_wm.h"
+#include "pipe/p_state.h"
+#include "pipe/p_util.h"
#define FILE_DEBUG_FLAG DEBUG_FALLBACKS
@@ -43,11 +46,10 @@
static void calculate_curbe_offsets( struct brw_context *brw )
{
/* CACHE_NEW_WM_PROG */
- unsigned nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
+ unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16);
/* BRW_NEW_VERTEX_PROGRAM */
- struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->attribs.VertexProgram;
- unsigned nr_vp_regs = (vp->program.num_inputs * 4 + 15) / 16;
+ unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16);
unsigned nr_clip_regs = 0;
unsigned total_regs;
@@ -55,7 +57,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
/* BRW_NEW_CLIP ? */
if (brw->attribs.Transform->ClipPlanesEnabled) {
unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled);
- nr_clip_regs = (nr_planes * 4 + 15) / 16;
+ nr_clip_regs = align(nr_planes * 4, 16);
}
#endif
@@ -172,28 +174,18 @@ static float fixed_plane[6][4] = {
{ 1, 0, 0, 1 }
};
-#if 0
/* Upload a new set of constants. Too much variability to go into the
* cache mechanism, but maybe would benefit from a comparison against
* the current uploaded set of constants.
*/
static void upload_constant_buffer(struct brw_context *brw)
{
- GLcontext *ctx = &brw->intel.ctx;
- struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
- struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
unsigned sz = brw->curbe.total_size;
unsigned bufsz = sz * 16 * sizeof(float);
float *buf;
unsigned i;
- /* Update our own dependency flags. This works because this
- * function will also be called whenever fp or vp changes.
- */
- brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
- brw->curbe.tracked_state.dirty.mesa |= vp->param_state;
- brw->curbe.tracked_state.dirty.mesa |= fp->param_state;
if (sz == 0) {
struct brw_constant_buffer cb;
@@ -220,10 +212,16 @@ static void upload_constant_buffer(struct brw_context *brw)
if (brw->curbe.wm_size) {
unsigned offset = brw->curbe.wm_start * 16;
- _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+ /* First the constant buffer constants:
+ */
+
+ /* Then any internally generated constants:
+ */
+ for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++)
+ buf[offset + i] = brw->wm.prog_data->internal_const[i];
- for (i = 0; i < brw->wm.prog_data->nr_params; i++)
- buf[offset + i] = brw->wm.prog_data->param[i][0];
+ assert(brw->wm.prog_data->max_const ==
+ brw->wm.prog_data->nr_internal_consts);
}
@@ -243,34 +241,26 @@ static void upload_constant_buffer(struct brw_context *brw)
buf[offset + i * 4 + 3] = fixed_plane[i][3];
}
- /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
- * clip-space:
+ /* Clip planes: BRW_NEW_CLIP:
*/
- assert(MAX_CLIP_PLANES == 6);
- for (j = 0; j < MAX_CLIP_PLANES; j++) {
- if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) {
- buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0];
- buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1];
- buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2];
- buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3];
- i++;
- }
+ for (j = 0; j < brw->attribs.Clip.nr; j++) {
+ buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0];
+ buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1];
+ buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2];
+ buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3];
+ i++;
}
}
if (brw->curbe.vs_size) {
- unsigned offset = brw->curbe.vs_start * 16;
- unsigned nr = vp->program.Base.Parameters->NumParameters;
+// unsigned offset = brw->curbe.vs_start * 16;
+// unsigned nr = vp->max_const;
- _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+ /* map the vertex constant buffer and copy to curbe: */
- for (i = 0; i < nr; i++) {
- buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0];
- buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1];
- buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2];
- buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3];
- }
+// assert(nr == 0);
+ assert(0);
}
if (0) {
@@ -309,7 +299,12 @@ static void upload_constant_buffer(struct brw_context *brw)
/* Copy data to the buffer:
*/
- dri_bo_subdata(pool->buffer, brw->curbe.gs_offset, bufsz, buf);
+ brw->winsys->buffer_subdata_typed(brw->winsys,
+ pool->buffer,
+ brw->curbe.gs_offset,
+ bufsz,
+ buf,
+ BRW_CONSTANT_BUFFER );
}
/* TODO: only emit the constant_buffer packet when necessary, ie:
@@ -341,9 +336,7 @@ static void upload_constant_buffer(struct brw_context *brw)
* flushes as necessary when doublebuffering of CURBEs isn't
* possible.
*/
-/* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */
BRW_BATCH_STRUCT(brw, &cb);
-/* intel_batchbuffer_align(brw->intel.batch, 64, 0); */
}
}
@@ -355,9 +348,8 @@ static void upload_constant_buffer(struct brw_context *brw)
*/
const struct brw_tracked_state brw_constant_buffer = {
.dirty = {
- .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */
- .brw = (BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_VERTEX_PROGRAM |
+ .brw = (BRW_NEW_CLIP |
+ BRW_NEW_CONSTANTS |
BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
BRW_NEW_CURBE_OFFSETS),
@@ -366,4 +358,3 @@ const struct brw_tracked_state brw_constant_buffer = {
.update = upload_constant_buffer
};
-#endif
diff --git a/src/mesa/pipe/i965simple/brw_draw.c b/src/mesa/pipe/i965simple/brw_draw.c
index f443f41c6f..01c8ddb227 100644
--- a/src/mesa/pipe/i965simple/brw_draw.c
+++ b/src/mesa/pipe/i965simple/brw_draw.c
@@ -151,7 +151,6 @@ static boolean brw_try_draw_elements( struct pipe_context *pipe,
unsigned count )
{
struct brw_context *brw = brw_context(pipe);
- boolean retval = FALSE;
/* Set the first primitive ahead of validate_state:
*/
diff --git a/src/mesa/pipe/i965simple/brw_draw_upload.c b/src/mesa/pipe/i965simple/brw_draw_upload.c
index 186a6274fa..79144837e8 100644
--- a/src/mesa/pipe/i965simple/brw_draw_upload.c
+++ b/src/mesa/pipe/i965simple/brw_draw_upload.c
@@ -207,25 +207,28 @@ static unsigned get_index_type(int type)
boolean brw_upload_vertex_buffers( struct brw_context *brw )
{
struct brw_array_state vbp;
+ unsigned nr_enabled = 0;
unsigned i;
- int nr_enabled = brw->vb.last_vb + 1;
memset(&vbp, 0, sizeof(vbp));
/* This is a hardware limit:
*/
- if (nr_enabled >= BRW_VEP_MAX)
- return FALSE;
- for (i = 0; i < nr_enabled; i++)
+ for (i = 0; i < BRW_VEP_MAX; i++)
{
- vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i].pitch;
+ if (brw->vb.vbo_array[i]->buffer == NULL) {
+ nr_enabled = i;
+ break;
+ }
+
+ vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->pitch;
vbp.vb[i].vb0.bits.pad = 0;
vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
vbp.vb[i].vb0.bits.vb_index = i;
- vbp.vb[i].offset = brw->vb.vbo_array[i].buffer_offset;
- vbp.vb[i].buffer = brw->vb.vbo_array[i].buffer;
- vbp.vb[i].max_index = brw->vb.vbo_array[i].max_index;
+ vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset;
+ vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer;
+ vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index;
}
@@ -260,7 +263,7 @@ boolean brw_upload_vertex_elements( struct brw_context *brw )
for (i = 0; i < nr_enabled; i++) {
struct brw_vertex_element *input = &brw->vb.inputs[i];
- switch (brw->vb.vbo_array[input->vep.ve0.vertex_buffer_index].pitch) {
+ switch (brw->vb.vbo_array[input->vep.ve0.vertex_buffer_index]->pitch) {
case 0: input->vep.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0;
case 1: input->vep.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0;
case 2: input->vep.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0;
diff --git a/src/mesa/pipe/i965simple/brw_eu.h b/src/mesa/pipe/i965simple/brw_eu.h
index 111edb1506..23151ae9ed 100644
--- a/src/mesa/pipe/i965simple/brw_eu.h
+++ b/src/mesa/pipe/i965simple/brw_eu.h
@@ -694,6 +694,17 @@ void brw_init_compile( struct brw_compile *p );
const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz );
+struct brw_instruction *brw_alu1( struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src );
+
+struct brw_instruction *brw_alu2(struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1 );
+
/* Helpers for regular instructions:
*/
#define ALU1(OP) \
diff --git a/src/mesa/pipe/i965simple/brw_eu_emit.c b/src/mesa/pipe/i965simple/brw_eu_emit.c
index bda63e8b9a..2423536dd1 100644
--- a/src/mesa/pipe/i965simple/brw_eu_emit.c
+++ b/src/mesa/pipe/i965simple/brw_eu_emit.c
@@ -363,10 +363,10 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
}
-static struct brw_instruction *brw_alu1( struct brw_compile *p,
- unsigned opcode,
- struct brw_reg dest,
- struct brw_reg src )
+struct brw_instruction *brw_alu1( struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src )
{
struct brw_instruction *insn = next_insn(p, opcode);
brw_set_dest(insn, dest);
@@ -374,11 +374,11 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p,
return insn;
}
-static struct brw_instruction *brw_alu2(struct brw_compile *p,
- unsigned opcode,
- struct brw_reg dest,
- struct brw_reg src0,
- struct brw_reg src1 )
+struct brw_instruction *brw_alu2(struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1 )
{
struct brw_instruction *insn = next_insn(p, opcode);
brw_set_dest(insn, dest);
diff --git a/src/mesa/pipe/i965simple/brw_gs_state.c b/src/mesa/pipe/i965simple/brw_gs_state.c
index 8e62eb4bd7..3932e9e939 100644
--- a/src/mesa/pipe/i965simple/brw_gs_state.c
+++ b/src/mesa/pipe/i965simple/brw_gs_state.c
@@ -34,6 +34,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
+#include "pipe/p_util.h"
@@ -46,7 +47,7 @@ static void upload_gs_unit( struct brw_context *brw )
/* CACHE_NEW_GS_PROG */
if (brw->gs.prog_active) {
gs.thread0.grf_reg_count =
- ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1;
+ align(brw->gs.prog_data->total_grf, 16) / 16 - 1;
gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6;
gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length;
}
diff --git a/src/mesa/pipe/i965simple/brw_misc_state.c b/src/mesa/pipe/i965simple/brw_misc_state.c
index 0750502334..e600e9d8de 100644
--- a/src/mesa/pipe/i965simple/brw_misc_state.c
+++ b/src/mesa/pipe/i965simple/brw_misc_state.c
@@ -202,9 +202,7 @@ static void upload_depthbuffer(struct brw_context *brw)
OUT_BATCH(((depth_surface->pitch * depth_surface->cpp) - 1) |
(format << 18) |
(BRW_TILEWALK_YMAJOR << 26) |
-#if 0
- (depth_surface->region->tiled << 27) |
-#endif
+// (depth_surface->region->tiled << 27) |
(BRW_SURFACE_2D << 29));
OUT_RELOC(depth_surface->buffer,
PIPE_BUFFER_FLAG_READ | PIPE_BUFFER_FLAG_WRITE, 0);
@@ -317,7 +315,7 @@ static void upload_pipe_control(struct brw_context *brw)
const struct brw_tracked_state brw_pipe_control = {
.dirty = {
- .brw = BRW_NEW_CONTEXT,
+ .brw = BRW_NEW_SCENE,
.cache = 0
},
.update = upload_pipe_control
@@ -382,7 +380,7 @@ static void upload_invarient_state( struct brw_context *brw )
const struct brw_tracked_state brw_invarient_state = {
.dirty = {
- .brw = BRW_NEW_CONTEXT,
+ .brw = BRW_NEW_SCENE,
.cache = 0
},
.update = upload_invarient_state
@@ -418,7 +416,7 @@ static void upload_state_base_address( struct brw_context *brw )
const struct brw_tracked_state brw_state_base_address = {
.dirty = {
- .brw = BRW_NEW_CONTEXT,
+ .brw = BRW_NEW_SCENE,
.cache = 0
},
.update = upload_state_base_address
diff --git a/src/mesa/pipe/i965simple/brw_sf.c b/src/mesa/pipe/i965simple/brw_sf.c
index f009ff37d9..e7c02beda5 100644
--- a/src/mesa/pipe/i965simple/brw_sf.c
+++ b/src/mesa/pipe/i965simple/brw_sf.c
@@ -36,9 +36,8 @@
#include "brw_util.h"
#include "brw_sf.h"
#include "brw_state.h"
+#include "tgsi/util/tgsi_parse.h"
-#if 0
-#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
static void compile_sf_prog( struct brw_context *brw,
struct brw_sf_prog_key *key )
@@ -46,7 +45,6 @@ static void compile_sf_prog( struct brw_context *brw,
struct brw_sf_compile c;
const unsigned *program;
unsigned program_size;
- unsigned i, idx;
memset(&c, 0, sizeof(c));
@@ -55,27 +53,17 @@ static void compile_sf_prog( struct brw_context *brw,
brw_init_compile(&c.func);
c.key = *key;
- c.nr_attrs = brw_count_bits(c.key.attrs);
+
+
+ c.nr_attrs = c.key.vp_output_count;
c.nr_attr_regs = (c.nr_attrs+1)/2;
- c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
+
+ c.nr_setup_attrs = c.key.fp_input_count;
c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
c.prog_data.urb_read_length = c.nr_attr_regs;
c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
- /* Construct map from attribute number to position in the vertex.
- */
- for (i = idx = 0; i < VERT_RESULT_MAX; i++)
- if (c.key.attrs & (1<<i)) {
- c.attr_to_idx[i] = idx;
- c.idx_to_attr[idx] = i;
- if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
- c.point_attrs[i].CoordReplace =
- brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0];
- } else
- c.point_attrs[i].CoordReplace = FALSE;
- idx++;
- }
/* Which primitive? Or all three?
*/
@@ -90,21 +78,17 @@ static void compile_sf_prog( struct brw_context *brw,
break;
case SF_POINTS:
c.nr_verts = 1;
- if (key->do_point_sprite)
- brw_emit_point_sprite_setup( &c );
- else
- brw_emit_point_setup( &c );
+ brw_emit_point_setup( &c );
break;
+
case SF_UNFILLED_TRIS:
- c.nr_verts = 3;
- brw_emit_anyprim_setup( &c );
- break;
default:
assert(0);
return;
}
+
/* get the program
*/
program = brw_get_program(&c.func, &program_size);
@@ -142,20 +126,15 @@ static void upload_sf_prog( struct brw_context *brw )
/* Populate the key, noting state dependencies:
*/
/* CACHE_NEW_VS_PROG */
- key.attrs = brw->vs.prog_data->outputs_written;
+ key.vp_output_count = brw->vs.prog_data->outputs_written;
/* BRW_NEW_REDUCED_PRIMITIVE */
switch (brw->reduced_primitive) {
case PIPE_PRIM_TRIANGLES:
- /* NOTE: We just use the edgeflag attribute as an indicator that
- * unfilled triangles are active. We don't actually do the
- * edgeflag testing here, it is already done in the clip
- * program.
- */
- if (key.attrs & (1<<VERT_RESULT_EDGE))
- key.primitive = SF_UNFILLED_TRIS;
- else
- key.primitive = SF_TRIANGLES;
+// if (key.attrs & (1<<VERT_RESULT_EDGE))
+// key.primitive = SF_UNFILLED_TRIS;
+// else
+ key.primitive = SF_TRIANGLES;
break;
case PIPE_PRIM_LINES:
key.primitive = SF_LINES;
@@ -165,16 +144,15 @@ static void upload_sf_prog( struct brw_context *brw )
break;
}
- /* BRW_NEW_POINT */
- key.do_point_sprite = brw->attribs.Point->PointSprite;
- key.SpriteOrigin = brw->attribs.Point->SpriteOrigin;
- /* BRW_NEW_RASTER */
- key.do_flat_shading = (brw->attribs.Raster->flatshade);
- key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide);
- /* _NEW_POLYGON */
- if (key.do_twoside_color)
- key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW);
+// key.do_point_sprite = brw->attribs.Point->PointSprite;
+// key.SpriteOrigin = brw->attribs.Point->SpriteOrigin;
+
+// key.do_flat_shading = (brw->attribs.Raster->flatshade);
+// key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide);
+
+// if (key.do_twoside_color)
+// key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW);
if (!search_cache(brw, &key))
@@ -184,11 +162,150 @@ static void upload_sf_prog( struct brw_context *brw )
const struct brw_tracked_state brw_sf_prog = {
.dirty = {
- .brw = (BRW_NEW_RASTER |
- BRW_NEW_REDUCED_PRIMITIVE),
- .cache = CACHE_NEW_VS_PROG
+ .brw = (BRW_NEW_RASTERIZER |
+ BRW_NEW_REDUCED_PRIMITIVE |
+ BRW_NEW_VS |
+ BRW_NEW_FS),
+ .cache = 0,
},
.update = upload_sf_prog
};
-#endif
+
+/* Build a struct like the one we'd like the state tracker to pass to
+ * us.
+ */
+static void update_sf_linkage( struct brw_context *brw )
+{
+ const struct brw_vertex_program *vs = brw->attribs.VertexProgram;
+ const struct brw_fragment_program *fs = brw->attribs.FragmentProgram;
+ struct pipe_setup_linkage state;
+ struct tgsi_parse_context parse;
+
+ int i, j;
+ int nr_vp_outputs = 0;
+ int done = 0;
+
+ struct {
+ unsigned semantic:8;
+ unsigned semantic_index:16;
+ } fp_semantic[32], vp_semantic[32];
+
+ memset(&state, 0, sizeof(state));
+
+ state.fp_input_count = 0;
+
+
+
+
+ /* First scan fp inputs
+ */
+ tgsi_parse_init( &parse, fs->program.tokens );
+ while( !done &&
+ !tgsi_parse_end_of_tokens( &parse ) )
+ {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT)
+ {
+ int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
+ int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
+
+ for (i = first; i < last; i++) {
+ state.fp_input[i].vp_output = ~0;
+ state.fp_input[i].bf_vp_output = ~0;
+ state.fp_input[i].interp_mode =
+ parse.FullToken.FullDeclaration.Interpolation.Interpolate;
+
+ fp_semantic[i].semantic =
+ parse.FullToken.FullDeclaration.Semantic.SemanticName;
+ fp_semantic[i].semantic_index =
+ parse.FullToken.FullDeclaration.Semantic.SemanticIndex;
+
+ }
+
+ assert(last > state.fp_input_count);
+ state.fp_input_count = last;
+ }
+ break;
+ default:
+ done = 1;
+ break;
+ }
+ }
+
+
+ assert(state.fp_input_count == fs->program.num_inputs);
+
+
+ /* Then scan vp outputs
+ */
+ done = 0;
+ tgsi_parse_init( &parse, vs->program.tokens );
+ while( !done &&
+ !tgsi_parse_end_of_tokens( &parse ) )
+ {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT)
+ {
+ int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
+ int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
+
+ for (i = first; i < last; i++) {
+ vp_semantic[i].semantic =
+ parse.FullToken.FullDeclaration.Semantic.SemanticName;
+ vp_semantic[i].semantic_index =
+ parse.FullToken.FullDeclaration.Semantic.SemanticIndex;
+ }
+
+ assert(last > nr_vp_outputs);
+ nr_vp_outputs = last;
+ }
+ break;
+ default:
+ done = 1;
+ break;
+ }
+ }
+
+
+ /* Now match based on semantic information.
+ */
+ for (i = 0; i< state.fp_input_count; i++) {
+ for (j = 0; j < nr_vp_outputs; j++) {
+ if (fp_semantic[i].semantic == vp_semantic[j].semantic &&
+ fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) {
+ state.fp_input[i].vp_output = j;
+ }
+ }
+ if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) {
+ for (j = 0; j < nr_vp_outputs; j++) {
+ if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic &&
+ fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) {
+ state.fp_input[i].bf_vp_output = j;
+ }
+ }
+ }
+ }
+
+ if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) {
+ brw->sf.linkage = state;
+ brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE;
+ }
+}
+
+
+const struct brw_tracked_state brw_sf_linkage = {
+ .dirty = {
+ .brw = (BRW_NEW_VS |
+ BRW_NEW_FS),
+ .cache = 0,
+ },
+ .update = update_sf_linkage
+};
+
diff --git a/src/mesa/pipe/i965simple/brw_sf.h b/src/mesa/pipe/i965simple/brw_sf.h
index d04388325d..b7ada47560 100644
--- a/src/mesa/pipe/i965simple/brw_sf.h
+++ b/src/mesa/pipe/i965simple/brw_sf.h
@@ -42,15 +42,26 @@
#define SF_TRIANGLES 2
#define SF_UNFILLED_TRIS 3
+
+
struct brw_sf_prog_key {
- unsigned attrs:32;
+ unsigned vp_output_count:5;
+ unsigned fp_input_count:5;
+
unsigned primitive:2;
unsigned do_twoside_color:1;
unsigned do_flat_shading:1;
unsigned frontface_ccw:1;
unsigned do_point_sprite:1;
- unsigned pad:10;
- int SpriteOrigin;
+
+ /* Interpolation masks;
+ */
+ unsigned linear_mask;
+ unsigned persp_mask;
+ unsigned const_mask;
+
+
+// int SpriteOrigin;
};
struct brw_sf_point_tex {
diff --git a/src/mesa/pipe/i965simple/brw_sf_emit.c b/src/mesa/pipe/i965simple/brw_sf_emit.c
index 93f23171f2..834b5efdfe 100644
--- a/src/mesa/pipe/i965simple/brw_sf_emit.c
+++ b/src/mesa/pipe/i965simple/brw_sf_emit.c
@@ -36,171 +36,6 @@
#include "brw_util.h"
#include "brw_sf.h"
-#if 0
-static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
- struct brw_reg vert,
- unsigned attr)
-{
- unsigned off = c->attr_to_idx[attr] / 2;
- unsigned sub = c->attr_to_idx[attr] % 2;
-
- return brw_vec4_grf(vert.nr + off, sub * 4);
-}
-
-static boolean have_attr(struct brw_sf_compile *c,
- unsigned attr)
-{
- return (c->key.attrs & (1<<attr)) ? 1 : 0;
-}
-
-
-
-/***********************************************************************
- * Twoside lighting
- */
-static void copy_bfc( struct brw_sf_compile *c,
- struct brw_reg vert )
-{
- struct brw_compile *p = &c->func;
- unsigned i;
-
- for (i = 0; i < 2; i++) {
- if (have_attr(c, VERT_RESULT_COL0+i) &&
- have_attr(c, VERT_RESULT_BFC0+i))
- brw_MOV(p,
- get_vert_attr(c, vert, VERT_RESULT_COL0+i),
- get_vert_attr(c, vert, VERT_RESULT_BFC0+i));
- }
-}
-
-
-static void do_twoside_color( struct brw_sf_compile *c )
-{
- struct brw_compile *p = &c->func;
- struct brw_instruction *if_insn;
- unsigned backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
-
- /* Already done in clip program:
- */
- if (c->key.primitive == SF_UNFILLED_TRIS)
- return;
-
- /* XXX: What happens if BFC isn't present? This could only happen
- * for user-supplied vertex programs, as t_vp_build.c always does
- * the right thing.
- */
- if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
- !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
- return;
-
- /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
- * to get all channels active inside the IF. In the clipping code
- * we run with NoMask, so it's not an option and we can use
- * BRW_EXECUTE_1 for all comparisions.
- */
- brw_push_insn_state(p);
- brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
- if_insn = brw_IF(p, BRW_EXECUTE_4);
- {
- switch (c->nr_verts) {
- case 3: copy_bfc(c, c->vert[2]);
- case 2: copy_bfc(c, c->vert[1]);
- case 1: copy_bfc(c, c->vert[0]);
- }
- }
- brw_ENDIF(p, if_insn);
- brw_pop_insn_state(p);
-}
-
-
-
-/***********************************************************************
- * Flat shading
- */
-
-#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
- (1<<VERT_RESULT_COL1))
-
-static void copy_colors( struct brw_sf_compile *c,
- struct brw_reg dst,
- struct brw_reg src)
-{
- struct brw_compile *p = &c->func;
- unsigned i;
-
- for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
- if (have_attr(c,i))
- brw_MOV(p,
- get_vert_attr(c, dst, i),
- get_vert_attr(c, src, i));
- }
-}
-
-
-
-/* Need to use a computed jump to copy flatshaded attributes as the
- * vertices are ordered according to y-coordinate before reaching this
- * point, so the PV could be anywhere.
- */
-static void do_flatshade_triangle( struct brw_sf_compile *c )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg ip = brw_ip_reg();
- unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
- if (!nr)
- return;
-
- /* Already done in clip program:
- */
- if (c->key.primitive == SF_UNFILLED_TRIS)
- return;
-
- brw_push_insn_state(p);
-
- brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1));
- brw_JMPI(p, ip, ip, c->pv);
-
- copy_colors(c, c->vert[1], c->vert[0]);
- copy_colors(c, c->vert[2], c->vert[0]);
- brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1));
-
- copy_colors(c, c->vert[0], c->vert[1]);
- copy_colors(c, c->vert[2], c->vert[1]);
- brw_JMPI(p, ip, ip, brw_imm_ud(nr*2));
-
- copy_colors(c, c->vert[0], c->vert[2]);
- copy_colors(c, c->vert[1], c->vert[2]);
-
- brw_pop_insn_state(p);
-}
-
-
-static void do_flatshade_line( struct brw_sf_compile *c )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg ip = brw_ip_reg();
- unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
-
- if (!nr)
- return;
-
- /* Already done in clip program:
- */
- if (c->key.primitive == SF_UNFILLED_TRIS)
- return;
-
- brw_push_insn_state(p);
-
- brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1));
- brw_JMPI(p, ip, ip, c->pv);
- copy_colors(c, c->vert[1], c->vert[0]);
-
- brw_JMPI(p, ip, ip, brw_imm_ud(nr));
- copy_colors(c, c->vert[0], c->vert[1]);
-
- brw_pop_insn_state(p);
-}
-
/***********************************************************************
@@ -277,9 +112,6 @@ static void copy_z_inv_w( struct brw_sf_compile *c )
static void invert_det( struct brw_sf_compile *c)
{
- /* Looks like we invert all 8 elements just to get 1/det in
- * position 2 !?!
- */
brw_math(&c->func,
c->inv_det,
BRW_MATH_FUNCTION_INV,
@@ -302,22 +134,16 @@ static boolean calculate_masks( struct brw_sf_compile *c,
ushort *pc_linear)
{
boolean is_last_attr = (reg == c->nr_setup_regs - 1);
- unsigned persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS;
- unsigned linear_mask;
- if (c->key.do_flat_shading)
- linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
- else
- linear_mask = c->key.attrs;
*pc_persp = 0;
*pc_linear = 0;
*pc = 0xf;
- if (persp_mask & (1 << c->idx_to_attr[reg*2]))
- *pc_persp = 0xf;
+// if (persp_mask & (1 << c->idx_to_attr[reg*2]))
+// *pc_persp = 0xf;
- if (linear_mask & (1 << c->idx_to_attr[reg*2]))
+// if (linear_mask & (1 << c->idx_to_attr[reg*2]))
*pc_linear = 0xf;
/* Maybe only processs one attribute on the final round:
@@ -325,10 +151,10 @@ static boolean calculate_masks( struct brw_sf_compile *c,
if (reg*2+1 < c->nr_setup_attrs) {
*pc |= 0xf0;
- if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
- *pc_persp |= 0xf0;
+// if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
+// *pc_persp |= 0xf0;
- if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
+// if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
*pc_linear |= 0xf0;
}
@@ -347,12 +173,6 @@ void brw_emit_tri_setup( struct brw_sf_compile *c )
invert_det(c);
copy_z_inv_w(c);
- if (c->key.do_twoside_color)
- do_twoside_color(c);
-
- if (c->key.do_flat_shading)
- do_flatshade_triangle(c);
-
for (i = 0; i < c->nr_setup_regs; i++)
{
@@ -433,9 +253,6 @@ void brw_emit_line_setup( struct brw_sf_compile *c )
invert_det(c);
copy_z_inv_w(c);
- if (c->key.do_flat_shading)
- do_flatshade_line(c);
-
for (i = 0; i < c->nr_setup_regs; i++)
{
/* Pair of incoming attributes:
@@ -491,86 +308,6 @@ void brw_emit_line_setup( struct brw_sf_compile *c )
}
}
-void brw_emit_point_sprite_setup( struct brw_sf_compile *c )
-{
- struct brw_compile *p = &c->func;
- unsigned i;
-
- c->nr_verts = 1;
- alloc_regs(c);
- copy_z_inv_w(c);
- for (i = 0; i < c->nr_setup_regs; i++)
- {
- struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
- struct brw_reg a0 = offset(c->vert[0], i);
- ushort pc, pc_persp, pc_linear;
- boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
-
- if (pc_persp)
- {
- if (!tex->CoordReplace) {
- brw_set_predicate_control_flag_value(p, pc_persp);
- brw_MUL(p, a0, a0, c->inv_w[0]);
- }
- }
-
- if (tex->CoordReplace) {
- /* Caculate 1.0/PointWidth */
- brw_math(&c->func,
- c->tmp,
- BRW_MATH_FUNCTION_INV,
- BRW_MATH_SATURATE_NONE,
- 0,
- c->dx0,
- BRW_MATH_DATA_SCALAR,
- BRW_MATH_PRECISION_FULL);
-
- if (c->key.SpriteOrigin == GL_UPPER_LEFT) {
- brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
- brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
- brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
- brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
- } else {
- brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
- brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
- brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
- brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
- }
- } else {
- brw_MOV(p, c->m1Cx, brw_imm_ud(0));
- brw_MOV(p, c->m2Cy, brw_imm_ud(0));
- }
-
- {
- brw_set_predicate_control_flag_value(p, pc);
- if (tex->CoordReplace) {
- if (c->key.SpriteOrigin == GL_UPPER_LEFT) {
- brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
- brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
- }
- else
- brw_MOV(p, c->m3C0, brw_imm_f(0.0));
- } else {
- brw_MOV(p, c->m3C0, a0); /* constant value */
- }
-
- /* Copy m0..m3 to URB.
- */
- brw_urb_WRITE(p,
- brw_null_reg(),
- 0,
- brw_vec8_grf(0, 0),
- 0, /* allocate */
- 1, /* used */
- 4, /* msg len */
- 0, /* response len */
- last, /* eot */
- last, /* writes complete */
- i*4, /* urb destination offset */
- BRW_URB_SWIZZLE_TRANSPOSE);
- }
- }
-}
/* Points setup - several simplifications as all attributes are
* constant across the face of the point (point sprites excluded!)
@@ -629,68 +366,3 @@ void brw_emit_point_setup( struct brw_sf_compile *c )
}
}
}
-
-void brw_emit_anyprim_setup( struct brw_sf_compile *c )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg ip = brw_ip_reg();
- struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
- struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
- struct brw_reg primmask;
- struct brw_instruction *jmp;
- struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
-
- alloc_regs(c);
-
- primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
-
- brw_MOV(p, primmask, brw_imm_ud(1));
- brw_SHL(p, primmask, primmask, payload_prim);
-
- brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
- brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
- (1<<_3DPRIM_TRISTRIP) |
- (1<<_3DPRIM_TRIFAN) |
- (1<<_3DPRIM_TRISTRIP_REVERSE) |
- (1<<_3DPRIM_POLYGON) |
- (1<<_3DPRIM_RECTLIST) |
- (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
- {
- brw_push_insn_state(p);
- brw_emit_tri_setup( c );
- brw_pop_insn_state(p);
- /* note - thread killed in subroutine */
- }
- brw_land_fwd_jump(p, jmp);
-
- brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
- brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
- (1<<_3DPRIM_LINESTRIP) |
- (1<<_3DPRIM_LINELOOP) |
- (1<<_3DPRIM_LINESTRIP_CONT) |
- (1<<_3DPRIM_LINESTRIP_BF) |
- (1<<_3DPRIM_LINESTRIP_CONT_BF)));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
- {
- brw_push_insn_state(p);
- brw_emit_line_setup( c );
- brw_pop_insn_state(p);
- /* note - thread killed in subroutine */
- }
- brw_land_fwd_jump(p, jmp);
-
- brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
- brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
- {
- brw_push_insn_state(p);
- brw_emit_point_sprite_setup( c );
- brw_pop_insn_state(p);
- }
- brw_land_fwd_jump(p, jmp);
-
- brw_emit_point_setup( c );
-}
-
-#endif
diff --git a/src/mesa/pipe/i965simple/brw_sf_state.c b/src/mesa/pipe/i965simple/brw_sf_state.c
index 7b6ee215eb..0de6e7240e 100644
--- a/src/mesa/pipe/i965simple/brw_sf_state.c
+++ b/src/mesa/pipe/i965simple/brw_sf_state.c
@@ -34,91 +34,41 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
+#include "pipe/p_util.h"
-#if 0
static void upload_sf_vp(struct brw_context *brw)
{
- GLcontext *ctx = &brw->intel.ctx;
struct brw_sf_viewport sfv;
- struct intel_renderbuffer *irb =
- intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0][0]);
- float y_scale, y_bias;
- int x, y, w, h, x1, x2, y1, y2;
- int draw_h = ctx->DrawBuffer->Height;
memset(&sfv, 0, sizeof(sfv));
- if (ctx->DrawBuffer->Name) {
- /* User-created FBO */
- if (irb && !irb->RenderToTexture) {
- y_scale = -1.0;
- y_bias = draw_h;
- } else {
- y_scale = 1.0;
- y_bias = 0;
- }
- } else {
- if (brw->intel.driDrawable != NULL) {
- y_scale = -1.0;
- y_bias = draw_h;
- } else {
- y_scale = 1.0;
- y_bias = 0;
- }
- }
- /* _NEW_VIEWPORT, BRW_NEW_METAOPS */
-
- if (!brw->metaops.active) {
- const float *v = brw->intel.ctx.Viewport._WindowMap.m;
-
- sfv.viewport.m00 = v[MAT_SX];
- sfv.viewport.m11 = v[MAT_SY] * y_scale;
- sfv.viewport.m22 = v[MAT_SZ] * brw->intel.depth_scale;
- sfv.viewport.m30 = v[MAT_TX];
- sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
- sfv.viewport.m32 = v[MAT_TZ] * brw->intel.depth_scale;
- } else {
- sfv.viewport.m00 = 1;
- sfv.viewport.m11 = - 1;
- sfv.viewport.m22 = 1;
- sfv.viewport.m30 = 0;
- sfv.viewport.m31 = brw->intel.driDrawable->h;
- sfv.viewport.m32 = 0;
- }
+ /* BRW_NEW_VIEWPORT */
+ {
+ const float *scale = brw->attribs.Viewport.scale;
+ const float *trans = brw->attribs.Viewport.translate;
- /* _NEW_SCISSOR */
- x = brw->attribs.Scissor->X;
- y = brw->attribs.Scissor->Y;
- w = brw->attribs.Scissor->Width;
- h = brw->attribs.Scissor->Height;
-
- if (ctx->DrawBuffer->Name == 0) {
- x1 = x;
- y1 = draw_h - (y + h);
- x2 = x + w - 1;
- y2 = y1 + h - 1;
- } else {
- /* FBO has non-inverted coords. */
- x1 = x;
- y1 = y;
- x2 = x + w - 1;
- y2 = y + h - 1;
+ sfv.viewport.m00 = scale[0];
+ sfv.viewport.m11 = scale[1];
+ sfv.viewport.m22 = scale[2];
+ sfv.viewport.m30 = trans[0];
+ sfv.viewport.m31 = trans[1];
+ sfv.viewport.m32 = trans[2];
}
- sfv.scissor.xmin = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1);
- sfv.scissor.xmax = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1);
- sfv.scissor.ymin = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1);
- sfv.scissor.ymax = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1);
+ /* _NEW_SCISSOR */
+ sfv.scissor.xmin = brw->attribs.Scissor.minx;
+ sfv.scissor.xmax = brw->attribs.Scissor.maxx;
+ sfv.scissor.ymin = brw->attribs.Scissor.miny;
+ sfv.scissor.ymax = brw->attribs.Scissor.maxy;
brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv );
}
const struct brw_tracked_state brw_sf_vp = {
.dirty = {
- .mesa = (_NEW_VIEWPORT |
- _NEW_SCISSOR),
- .brw = BRW_NEW_METAOPS,
+ .brw = (BRW_NEW_SCISSOR |
+ BRW_NEW_VIEWPORT),
.cache = 0
},
.update = upload_sf_vp
@@ -130,7 +80,7 @@ static void upload_sf_unit( struct brw_context *brw )
memset(&sf, 0, sizeof(sf));
/* CACHE_NEW_SF_PROG */
- sf.thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1;
+ sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1;
sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6;
sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
@@ -151,19 +101,19 @@ static void upload_sf_unit( struct brw_context *brw )
/* CACHE_NEW_SF_VP */
sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5;
-
sf.sf5.viewport_transform = 1;
- /* _NEW_SCISSOR */
- if (brw->attribs.Scissor->Enabled)
+ /* BRW_NEW_RASTER */
+ if (brw->attribs.Raster->scissor)
sf.sf6.scissor = 1;
- /* _NEW_POLYGON */
+#if 0
if (brw->attribs.Polygon->FrontFace == GL_CCW)
sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
else
sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+
if (brw->attribs.Polygon->CullFlag) {
switch (brw->attribs.Polygon->CullFaceMode) {
case GL_FRONT:
@@ -182,25 +132,24 @@ static void upload_sf_unit( struct brw_context *brw )
}
else
sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+#else
+ sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+ sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+#endif
-
- /* _NEW_LINE */
- /* XXX use ctx->Const.Min/MaxLineWidth here */
- sf.sf6.line_width = CLAMP(brw->attribs.Line->Width, 1.0, 5.0) * (1<<1);
+ sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1);
sf.sf6.line_endcap_aa_region_width = 1;
- if (brw->attribs.Line->SmoothFlag)
+ if (brw->attribs.Raster->line_smooth)
sf.sf6.aa_enable = 1;
else if (sf.sf6.line_width <= 0x2)
sf.sf6.line_width = 0;
- /* _NEW_POINT */
sf.sf6.point_rast_rule = 1; /* opengl conventions */
- /* XXX clamp max depends on AA vs. non-AA */
- sf.sf7.sprite_point = brw->attribs.Point->PointSprite;
- sf.sf7.point_size = CLAMP(brw->attribs.Point->Size, 1.0, 255.0) * (1<<3);
- sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated;
+ sf.sf7.sprite_point = brw->attribs.Raster->point_sprite;
+ sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3);
+ sf.sf7.use_point_size_state = brw->attribs.Raster->point_size_per_vertex;
/* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
*/
@@ -220,12 +169,8 @@ static void upload_sf_unit( struct brw_context *brw )
const struct brw_tracked_state brw_sf_unit = {
.dirty = {
- .mesa = (_NEW_POLYGON |
- _NEW_LINE |
- _NEW_POINT |
- _NEW_SCISSOR),
- .brw = (BRW_NEW_URB_FENCE |
- BRW_NEW_METAOPS),
+ .brw = (BRW_NEW_RASTERIZER |
+ BRW_NEW_URB_FENCE),
.cache = (CACHE_NEW_SF_VP |
CACHE_NEW_SF_PROG)
},
@@ -233,4 +178,3 @@ const struct brw_tracked_state brw_sf_unit = {
};
-#endif
diff --git a/src/mesa/pipe/i965simple/brw_shader_info.c b/src/mesa/pipe/i965simple/brw_shader_info.c
new file mode 100644
index 0000000000..431b45466a
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_shader_info.c
@@ -0,0 +1,49 @@
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
+
+
+
+
+void brw_shader_info(const struct tgsi_token *tokens,
+ struct brw_shader_info *info )
+{
+ struct tgsi_parse_context parse;
+ int done = 0;
+
+ tgsi_parse_init( &parse, tokens );
+
+ while( !done &&
+ !tgsi_parse_end_of_tokens( &parse ) )
+ {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
+ unsigned last = decl->u.DeclarationRange.Last;
+
+ assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
+
+ // Broken by crazy wpos init:
+ //assert( info->nr_regs[decl->Declaration.File] <= last);
+
+ info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File],
+ last+1);
+ break;
+ }
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ default:
+ done = 1;
+ break;
+ }
+ }
+
+ tgsi_parse_free (&parse);
+
+}
diff --git a/src/mesa/pipe/i965simple/brw_state.c b/src/mesa/pipe/i965simple/brw_state.c
index ff4ae7999b..26450ae597 100644
--- a/src/mesa/pipe/i965simple/brw_state.c
+++ b/src/mesa/pipe/i965simple/brw_state.c
@@ -198,6 +198,13 @@ static void * brw_create_fs_state(struct pipe_context *pipe,
/* XXX: Do I have to duplicate the tokens as well??
*/
brw_fp->program = *shader;
+ brw_fp->id = brw_context(pipe)->program_id++;
+
+ brw_shader_info(shader->tokens,
+ &brw_fp->info);
+
+ tgsi_dump(shader->tokens, 0);
+
return (void *)brw_fp;
}
@@ -228,6 +235,9 @@ static void *brw_create_vs_state(struct pipe_context *pipe,
/* XXX: Do I have to duplicate the tokens as well??
*/
brw_vp->program = *shader;
+ brw_vp->id = brw_context(pipe)->program_id++;
+ brw_shader_info(shader->tokens,
+ &brw_vp->info);
tgsi_dump(shader->tokens, 0);
@@ -273,14 +283,11 @@ static void brw_set_viewport_state( struct pipe_context *pipe,
static void brw_set_vertex_buffer( struct pipe_context *pipe,
- unsigned index,
- const struct pipe_vertex_buffer *buffer )
+ unsigned index,
+ const struct pipe_vertex_buffer *buffer )
{
struct brw_context *brw = brw_context(pipe);
- brw->vb.vbo_array[index] = *buffer;
- if (index > brw->vb.last_vb)
- brw->vb.last_vb = index;
- assert(brw->vb.last_vb < BRW_VEP_MAX);
+ brw->vb.vbo_array[index] = buffer;
}
static void brw_set_vertex_element(struct pipe_context *pipe,
diff --git a/src/mesa/pipe/i965simple/brw_state.h b/src/mesa/pipe/i965simple/brw_state.h
index 4dabfe8082..d09711f6f0 100644
--- a/src/mesa/pipe/i965simple/brw_state.h
+++ b/src/mesa/pipe/i965simple/brw_state.h
@@ -154,4 +154,11 @@ void brw_upload_clip_prog(struct brw_context *brw);
void brw_upload_blend_constant_color(struct brw_context *brw);
void brw_upload_wm_samplers(struct brw_context *brw);
+/* brw_shader_info.c
+ */
+
+void brw_shader_info(const struct tgsi_token *tokens,
+ struct brw_shader_info *info );
+
+
#endif
diff --git a/src/mesa/pipe/i965simple/brw_state_cache.c b/src/mesa/pipe/i965simple/brw_state_cache.c
index 13e262d2e5..c5738733f4 100644
--- a/src/mesa/pipe/i965simple/brw_state_cache.c
+++ b/src/mesa/pipe/i965simple/brw_state_cache.c
@@ -178,8 +178,9 @@ unsigned brw_upload_cache( struct brw_cache *cache,
if (BRW_DEBUG & DEBUG_STATE)
printf("upload %s: %d bytes to pool buffer %p offset %x\n",
- cache->name, data_size,
- cache->pool->buffer,
+ cache->name,
+ data_size,
+ (void*)cache->pool->buffer,
offset);
/* Copy data to the buffer:
diff --git a/src/mesa/pipe/i965simple/brw_state_pool.c b/src/mesa/pipe/i965simple/brw_state_pool.c
index a490049024..78268ed8f2 100644
--- a/src/mesa/pipe/i965simple/brw_state_pool.c
+++ b/src/mesa/pipe/i965simple/brw_state_pool.c
@@ -43,17 +43,18 @@
*/
#include "pipe/p_winsys.h"
+#include "pipe/p_util.h"
#include "brw_context.h"
#include "brw_state.h"
boolean brw_pool_alloc( struct brw_mem_pool *pool,
unsigned size,
- unsigned align,
+ unsigned alignment,
unsigned *offset_return)
{
- unsigned fixup = ALIGN(pool->offset, align) - pool->offset;
+ unsigned fixup = align(pool->offset, alignment) - pool->offset;
- size = ALIGN(size, 4);
+ size = align(size, 4);
if (pool->offset + fixup + size >= pool->size) {
printf("%s failed\n", __FUNCTION__);
@@ -114,7 +115,7 @@ void brw_pool_check_wrap( struct brw_context *brw,
struct brw_mem_pool *pool )
{
if (pool->offset > (pool->size * 3) / 4) {
- brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+ brw->state.dirty.brw |= BRW_NEW_SCENE;
}
}
diff --git a/src/mesa/pipe/i965simple/brw_state_upload.c b/src/mesa/pipe/i965simple/brw_state_upload.c
index 1ca7484958..1fb480172d 100644
--- a/src/mesa/pipe/i965simple/brw_state_upload.c
+++ b/src/mesa/pipe/i965simple/brw_state_upload.c
@@ -97,39 +97,16 @@ const struct brw_tracked_state *atoms[] =
void brw_init_state( struct brw_context *brw )
{
- unsigned i;
-
brw_init_pools(brw);
brw_init_caches(brw);
- brw->state.atoms = MALLOC(sizeof(atoms));
- brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms);
- memcpy(brw->state.atoms, atoms, sizeof(atoms));
-
- /* Patch in a pointer to the dynamic state atom:
- */
- for (i = 0; i < brw->state.nr_atoms; i++)
- if (brw->state.atoms[i] == NULL)
- brw->state.atoms[i] = &brw->curbe.tracked_state;
-
- memcpy(&brw->curbe.tracked_state,
- &brw_constant_buffer,
- sizeof(brw_constant_buffer));
-
brw->state.dirty.brw = ~0;
brw->emit_state_always = 0;
-
-
}
void brw_destroy_state( struct brw_context *brw )
{
- if (brw->state.atoms) {
- FREE(brw->state.atoms);
- brw->state.atoms = NULL;
- }
-
brw_destroy_caches(brw);
brw_destroy_batch_cache(brw);
brw_destroy_pools(brw);
@@ -177,7 +154,7 @@ void brw_validate_state( struct brw_context *brw )
state->brw == 0)
return;
- if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
+ if (brw->state.dirty.brw & BRW_NEW_SCENE)
brw_clear_batch_cache_flush(brw);
if (BRW_DEBUG) {
@@ -189,21 +166,17 @@ void brw_validate_state( struct brw_context *brw )
memset(&examined, 0, sizeof(examined));
prev = *state;
- for (i = 0; i < brw->state.nr_atoms; i++) {
- const struct brw_tracked_state *atom = brw->state.atoms[i];
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
struct brw_state_flags generated;
assert(atom->dirty.brw ||
atom->dirty.cache);
assert(atom->update);
- if (check_state(state, &atom->dirty) || atom->always_update) {
+ if (check_state(state, &atom->dirty)) {
atom->update( brw );
-
-/* emit_foo(brw); */
}
- if (atom->emit_reloc != NULL)
- atom->emit_reloc(brw);
accumulate_state(&examined, &atom->dirty);
@@ -218,12 +191,10 @@ void brw_validate_state( struct brw_context *brw )
}
else {
for (i = 0; i < Elements(atoms); i++) {
- const struct brw_tracked_state *atom = brw->state.atoms[i];
+ const struct brw_tracked_state *atom = atoms[i];
- if (check_state(state, &atom->dirty) || atom->always_update)
+ if (check_state(state, &atom->dirty))
atom->update( brw );
- if (atom->emit_reloc != NULL)
- atom->emit_reloc(brw);
}
}
diff --git a/src/mesa/pipe/i965simple/brw_tex_layout.c b/src/mesa/pipe/i965simple/brw_tex_layout.c
index b9514be0c2..7d6e2851b1 100644
--- a/src/mesa/pipe/i965simple/brw_tex_layout.c
+++ b/src/mesa/pipe/i965simple/brw_tex_layout.c
@@ -149,10 +149,10 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
unsigned mip1_width;
if (pt->compressed) {
- mip1_width = ALIGN(minify(pt->width[0]), align_w)
- + ALIGN(minify(minify(pt->width[0])), align_w);
+ mip1_width = align(minify(pt->width[0]), align_w)
+ + align(minify(minify(pt->width[0])), align_w);
} else {
- mip1_width = ALIGN(minify(pt->width[0]), align_w)
+ mip1_width = align(minify(pt->width[0]), align_w)
+ minify(minify(pt->width[0]));
}
@@ -164,7 +164,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
/* Pitch must be a whole number of dwords, even though we
* express it in texels.
*/
- tex->pitch = ALIGN(tex->pitch * pt->cpp, 4) / pt->cpp;
+ tex->pitch = align(tex->pitch * pt->cpp, 4) / pt->cpp;
tex->total_height = 0;
for ( level = pt->first_level ; level <= pt->last_level ; level++ ) {
@@ -176,7 +176,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
if (pt->compressed)
img_height = MAX2(1, height/4);
else
- img_height = ALIGN(height, align_h);
+ img_height = align(height, align_h);
/* Because the images are packed better, the final offset
@@ -187,7 +187,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
/* Layout_below: step right after second mipmap.
*/
if (level == pt->first_level + 1) {
- x += ALIGN(width, align_w);
+ x += align(width, align_w);
}
else {
y += img_height;
@@ -221,13 +221,13 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture
#if 0
if (pt->compressed) {
align_w = intel_compressed_alignment(pt->internal_format);
- pt->pitch = ALIGN(width, align_w);
+ pt->pitch = align(width, align_w);
pack_y_pitch = (height + 3) / 4;
} else
#endif
{
- tex->pitch = ALIGN(pt->width[0] * pt->cpp, 4) / pt->cpp;
- pack_y_pitch = ALIGN(pt->height[0], align_h);
+ tex->pitch = align(pt->width[0] * pt->cpp, 4) / pt->cpp;
+ pack_y_pitch = align(pt->height[0], align_h);
}
pack_x_pitch = tex->pitch;
@@ -262,8 +262,8 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture
if (pt->compressed) {
pack_y_pitch = (height + 3) / 4;
- if (pack_x_pitch > ALIGN(width, align_w)) {
- pack_x_pitch = ALIGN(width, align_w);
+ if (pack_x_pitch > align(width, align_w)) {
+ pack_x_pitch = align(width, align_w);
pack_x_nr <<= 1;
}
} else {
@@ -275,7 +275,7 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture
if (pack_y_pitch > 2) {
pack_y_pitch >>= 1;
- pack_y_pitch = ALIGN(pack_y_pitch, align_h);
+ pack_y_pitch = align(pack_y_pitch, align_h);
}
}
@@ -305,8 +305,6 @@ brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt)
sizeof(struct brw_texture));
if (tex) {
- struct brw_context *brw = brw_context(pipe);
-
memset(&tex->base + 1, 0,
sizeof(struct brw_texture) - sizeof(struct pipe_texture));
diff --git a/src/mesa/pipe/i965simple/brw_vs.c b/src/mesa/pipe/i965simple/brw_vs.c
index 33c6624214..738c6346d5 100644
--- a/src/mesa/pipe/i965simple/brw_vs.c
+++ b/src/mesa/pipe/i965simple/brw_vs.c
@@ -97,13 +97,6 @@ static void brw_upload_vs_prog( struct brw_context *brw )
key.copy_edgeflag = (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL ||
brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL);
-#if 0
- /* BRW_NEW_METAOPS
- */
- if (brw->metaops.active)
- key.know_w_is_one = 1;
-#endif
-
/* Make an early check for the key.
*/
if (brw_search_cache(&brw->cache[BRW_VS_PROG],
@@ -120,9 +113,6 @@ static void brw_upload_vs_prog( struct brw_context *brw )
*/
const struct brw_tracked_state brw_vs_prog = {
.dirty = {
-#if 0
- .brw = BRW_NEW_VS | BRW_NEW_METAOPS,
-#endif
.brw = BRW_NEW_VS,
.cache = 0
},
diff --git a/src/mesa/pipe/i965simple/brw_vs_constval.c b/src/mesa/pipe/i965simple/brw_vs_constval.c
deleted file mode 100644
index de43e72c1d..0000000000
--- a/src/mesa/pipe/i965simple/brw_vs_constval.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "brw_context.h"
-#include "brw_vs.h"
-
-#if 0
-/* Component is active if it may diverge from [0,0,0,1]. Undef values
- * are promoted to [0,0,0,1] for the purposes of this analysis.
- */
-struct tracker {
- boolean twoside;
- ubyte active[PROGRAM_OUTPUT+1][128];
- unsigned size_masks[4];
-};
-
-
-static void set_active_component( struct tracker *t,
- unsigned file,
- unsigned index,
- ubyte active )
-{
- switch (file) {
- case PROGRAM_TEMPORARY:
- case PROGRAM_INPUT:
- case PROGRAM_OUTPUT:
- t->active[file][index] |= active;
-
- default:
- break;
- }
-}
-
-static void set_active( struct tracker *t,
- struct prog_dst_register dst,
- unsigned active )
-{
- set_active_component( t, dst.File, dst.Index, active & dst.WriteMask );
-}
-
-
-static ubyte get_active_component( struct tracker *t,
- unsigned file,
- unsigned index,
- unsigned component,
- ubyte swz )
-{
- switch (swz) {
- case SWIZZLE_ZERO:
- return component < 3 ? 0 : (1<<component);
- case SWIZZLE_ONE:
- return component == 3 ? 0 : (1<<component);
- default:
- switch (file) {
- case PROGRAM_TEMPORARY:
- case PROGRAM_INPUT:
- case PROGRAM_OUTPUT:
- return t->active[file][index] & (1<<component);
- default:
- return 1 << component;
- }
- }
-}
-
-
-static ubyte get_active( struct tracker *t,
- struct prog_src_register src )
-{
- unsigned i;
- ubyte active = src.NegateBase; /* NOTE! */
-
- if (src.RelAddr)
- return 0xf;
-
- for (i = 0; i < 4; i++)
- active |= get_active_component(t, src.File, src.Index, i,
- GET_SWZ(src.Swizzle, i));
-
- return active;
-}
-
-static ubyte get_output_size( struct tracker *t,
- unsigned idx )
-{
- ubyte active = t->active[PROGRAM_OUTPUT][idx];
- if (active & (1<<3)) return 4;
- if (active & (1<<2)) return 3;
- if (active & (1<<1)) return 2;
- if (active & (1<<0)) return 1;
- return 0;
-}
-
-/* Note the potential copying that occurs in the setup program:
- */
-static void calc_sizes( struct tracker *t )
-{
- unsigned i;
-
- if (t->twoside) {
- t->active[PROGRAM_OUTPUT][VERT_RESULT_COL0] |=
- t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC0];
-
- t->active[PROGRAM_OUTPUT][VERT_RESULT_COL1] |=
- t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC1];
- }
-
- for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
- switch (get_output_size(t, i)) {
- case 4: t->size_masks[4-1] |= 1<<i;
- case 3: t->size_masks[3-1] |= 1<<i;
- case 2: t->size_masks[2-1] |= 1<<i;
- case 1: t->size_masks[1-1] |= 1<<i;
- break;
- }
- }
-}
-
-static ubyte szflag[4+1] = {
- 0,
- 0x1,
- 0x3,
- 0x7,
- 0xf
-};
-
-/* Pull a size out of the packed array:
- */
-static unsigned get_input_size(struct brw_context *brw,
- unsigned attr)
-{
- unsigned sizes_dword = brw->vb.info.sizes[attr/16];
- unsigned sizes_bits = (sizes_dword>>((attr%16)*2)) & 0x3;
- return sizes_bits + 1;
-/* return brw->vb.inputs[attr].glarray->Size; */
-}
-
-/* Calculate sizes of vertex program outputs. Size is the largest
- * component index which might vary from [0,0,0,1]
- */
-static void calc_wm_input_sizes( struct brw_context *brw )
-{
- /* BRW_NEW_VERTEX_PROGRAM */
- struct brw_vertex_program *vp =
- (struct brw_vertex_program *)brw->vertex_program;
- /* BRW_NEW_INPUT_DIMENSIONS */
- struct tracker t;
- unsigned insn;
- unsigned i;
-
- memset(&t, 0, sizeof(t));
-
- /* _NEW_LIGHT */
- if (brw->attribs.Light->Model.TwoSide)
- t.twoside = 1;
-
- for (i = 0; i < PIPE_ATTRIB_MAX; i++)
- if (vp->program.Base.InputsRead & (1<<i))
- set_active_component(&t, PROGRAM_INPUT, i,
- szflag[get_input_size(brw, i)]);
-
- for (insn = 0; insn < vp->program.Base.NumInstructions; insn++) {
- struct prog_instruction *inst = &vp->program.Base.Instructions[insn];
-
- switch (inst->Opcode) {
- case OPCODE_ARL:
- break;
-
- case OPCODE_MOV:
- set_active(&t, inst->DstReg, get_active(&t, inst->SrcReg[0]));
- break;
-
- default:
- set_active(&t, inst->DstReg, 0xf);
- break;
- }
- }
-
- calc_sizes(&t);
-
- if (memcmp(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)) != 0) {
- memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks));
- brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS;
- }
-}
-
-const struct brw_tracked_state brw_wm_input_sizes = {
- .dirty = {
- .mesa = _NEW_LIGHT,
- .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
- .cache = 0
- },
- .update = calc_wm_input_sizes
-};
-#endif
diff --git a/src/mesa/pipe/i965simple/brw_vs_emit.c b/src/mesa/pipe/i965simple/brw_vs_emit.c
index 59459d4200..530e17a736 100644
--- a/src/mesa/pipe/i965simple/brw_vs_emit.c
+++ b/src/mesa/pipe/i965simple/brw_vs_emit.c
@@ -103,28 +103,26 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c,
c->first_output = reg;
mrf = 4;
for (i = 0; i < c->vp->program.num_outputs; i++) {
- if (c->prog_data.outputs_written & (1<<i)) {
- c->nr_outputs++;
+ c->nr_outputs++;
#if 0
- if (i == VERT_RESULT_HPOS) {
- c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
- }
- else if (i == VERT_RESULT_PSIZ) {
- c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
- mrf++; /* just a placeholder? XXX fix later stages & remove this */
- }
- else {
- c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
- mrf++;
- }
-#else
- /* for now stuff everything in grf */
+ if (i == VERT_RESULT_HPOS) {
c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
reg++;
-#endif
}
+ else if (i == VERT_RESULT_PSIZ) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ mrf++; /* just a placeholder? XXX fix later stages & remove this */
+ }
+ else {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
+ mrf++;
+ }
+#else
+ /* for now stuff everything in grf */
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+#endif
}
/* Allocate program temporaries:
@@ -627,11 +625,9 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
case TGSI_FILE_TEMPORARY:
case TGSI_FILE_INPUT:
case TGSI_FILE_OUTPUT:
+ case TGSI_FILE_CONSTANT:
assert(c->regs[file][index].nr != 0);
return c->regs[file][index];
- case TGSI_FILE_CONSTANT:
- assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
- return c->regs[TGSI_FILE_CONSTANT][index];
case TGSI_FILE_ADDRESS:
assert(index == 0);
return c->regs[file][index];
diff --git a/src/mesa/pipe/i965simple/brw_vs_state.c b/src/mesa/pipe/i965simple/brw_vs_state.c
index 7d6fb383b9..c73469929c 100644
--- a/src/mesa/pipe/i965simple/brw_vs_state.c
+++ b/src/mesa/pipe/i965simple/brw_vs_state.c
@@ -44,7 +44,7 @@ static void upload_vs_unit( struct brw_context *brw )
/* CACHE_NEW_VS_PROG */
vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6;
- vs.thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
+ vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1;
vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
vs.thread3.dispatch_grf_start_reg = 1;
diff --git a/src/mesa/pipe/i965simple/brw_vtbl.c b/src/mesa/pipe/i965simple/brw_vtbl.c
deleted file mode 100644
index 6dc3bd838b..0000000000
--- a/src/mesa/pipe/i965simple/brw_vtbl.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-
-#include "brw_draw.h"
-#include "brw_state.h"
-#include "brw_vs.h"
-#include <stdarg.h>
-
-#if 0
-/* called from intelDestroyContext()
- */
-static void brw_destroy_context( struct intel_context *intel )
-{
- GLcontext *ctx = &intel->ctx;
- struct brw_context *brw = brw_context(&intel->ctx);
-
- brw_destroy_metaops(brw);
- brw_destroy_state(brw);
- brw_draw_destroy( brw );
-
- brw_ProgramCacheDestroy( ctx );
- brw_FrameBufferTexDestroy( brw );
-}
-
-/* called from intelDrawBuffer()
- */
-static void brw_set_draw_region( struct intel_context *intel,
- struct intel_region *draw_region,
- struct intel_region *depth_region)
-{
- struct brw_context *brw = brw_context(&intel->ctx);
-
- intel_region_release(&brw->state.draw_region);
- intel_region_release(&brw->state.depth_region);
- intel_region_reference(&brw->state.draw_region, draw_region);
- intel_region_reference(&brw->state.depth_region, depth_region);
-}
-
-
-/* called from intelFlushBatchLocked
- */
-static void brw_lost_hardware( struct intel_context *intel )
-{
- struct brw_context *brw = brw_context(&intel->ctx);
-
- /* Note that we effectively lose the context after this.
- *
- * Setting this flag provokes a state buffer wrap and also flushes
- * the hardware caches.
- */
- brw->state.dirty.brw |= BRW_NEW_CONTEXT;
-
- /* Which means there shouldn't be any commands already queued:
- */
- assert(intel->batch->ptr == intel->batch->map);
-
- brw->state.dirty.mesa |= ~0;
- brw->state.dirty.brw |= ~0;
- brw->state.dirty.cache |= ~0;
-}
-
-static void brw_note_fence( struct intel_context *intel,
- unsigned fence )
-{
- brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
-}
-
-static void brw_note_unlock( struct intel_context *intel )
-{
- struct brw_context *brw = brw_context(&intel->ctx);
-
- brw_pool_check_wrap(brw, &brw->pool[BRW_GS_POOL]);
- brw_pool_check_wrap(brw, &brw->pool[BRW_SS_POOL]);
-
- brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_LOCK;
-}
-
-
-void brw_do_flush( struct brw_context *brw,
- unsigned flags )
-{
- struct brw_mi_flush flush;
- memset(&flush, 0, sizeof(flush));
- flush.opcode = CMD_MI_FLUSH;
- flush.flags = flags;
- BRW_BATCH_STRUCT(brw, &flush);
-}
-
-
-static void brw_emit_flush( struct intel_context *intel,
- unsigned unused )
-{
- brw_do_flush(brw_context(&intel->ctx),
- BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE);
-}
-
-
-/* called from intelWaitForIdle() and intelFlush()
- *
- * For now, just flush everything. Could be smarter later.
- */
-static unsigned brw_flush_cmd( void )
-{
- struct brw_mi_flush flush;
- flush.opcode = CMD_MI_FLUSH;
- flush.pad = 0;
- flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE;
- return *(unsigned *)&flush;
-}
-
-static void brw_invalidate_state( struct intel_context *intel, unsigned new_state )
-{
- /* nothing */
-}
-#endif
diff --git a/src/mesa/pipe/i965simple/brw_winsys.h b/src/mesa/pipe/i965simple/brw_winsys.h
index cc0a210a9f..49a12a1c27 100644
--- a/src/mesa/pipe/i965simple/brw_winsys.h
+++ b/src/mesa/pipe/i965simple/brw_winsys.h
@@ -95,6 +95,8 @@ enum brw_cache_id {
BRW_MAX_CACHE
};
+#define BRW_CONSTANT_BUFFER BRW_MAX_CACHE
+
/**
* Additional winsys interface for i965simple.
*
@@ -164,6 +166,13 @@ struct brw_winsys {
unsigned data_type);
+ /* A cheat so we don't have to think about relocations in a couple
+ * of places yet:
+ */
+ unsigned (*get_buffer_offset)( struct brw_winsys *sws,
+ struct pipe_buffer_handle *buf,
+ unsigned flags );
+
};
#define BRW_BUFFER_ACCESS_WRITE 0x1
diff --git a/src/mesa/pipe/i965simple/brw_wm.c b/src/mesa/pipe/i965simple/brw_wm.c
index 65271f22fd..f0a38d384b 100644
--- a/src/mesa/pipe/i965simple/brw_wm.c
+++ b/src/mesa/pipe/i965simple/brw_wm.c
@@ -33,153 +33,34 @@
#include "brw_context.h"
#include "brw_util.h"
#include "brw_wm.h"
+#include "brw_eu.h"
#include "brw_state.h"
+#include "pipe/p_util.h"
-unsigned brw_wm_nr_args( unsigned opcode )
-{
- switch (opcode) {
-
- case WM_PIXELXY:
- case TGSI_OPCODE_ABS:
- case TGSI_OPCODE_FLR:
- case TGSI_OPCODE_FRC:
- case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_COS:
- case TGSI_OPCODE_EX2:
- case TGSI_OPCODE_LG2:
- case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_RSQ:
- case TGSI_OPCODE_SIN:
- case TGSI_OPCODE_SCS:
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TXB:
- case TGSI_OPCODE_TXD:
- case TGSI_OPCODE_KIL:
- case TGSI_OPCODE_LIT:
- case WM_CINTERP:
- case WM_WPOSXY:
- return 1;
-
- case TGSI_OPCODE_POW:
- case TGSI_OPCODE_SUB:
- case TGSI_OPCODE_SGE:
- case TGSI_OPCODE_SGT:
- case TGSI_OPCODE_SLE:
- case TGSI_OPCODE_SLT:
- case TGSI_OPCODE_SEQ:
- case TGSI_OPCODE_SNE:
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_MAX:
- case TGSI_OPCODE_MIN:
- case TGSI_OPCODE_MUL:
- case TGSI_OPCODE_XPD:
- case TGSI_OPCODE_DP3:
- case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_DPH:
- case TGSI_OPCODE_DST:
- case WM_LINTERP:
- case WM_DELTAXY:
- case WM_PIXELW:
- return 2;
-
- case WM_FB_WRITE:
- case WM_PINTERP:
- case TGSI_OPCODE_MAD:
- case TGSI_OPCODE_CMP:
- case TGSI_OPCODE_LRP:
- return 3;
-
- default:
- return 0;
- }
-}
-
-
-unsigned brw_wm_is_scalar_result( unsigned opcode )
-{
- switch (opcode) {
- case TGSI_OPCODE_COS:
- case TGSI_OPCODE_EX2:
- case TGSI_OPCODE_LG2:
- case TGSI_OPCODE_POW:
- case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_RSQ:
- case TGSI_OPCODE_SIN:
- case TGSI_OPCODE_DP3:
- case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_DPH:
- case TGSI_OPCODE_DST:
- return 1;
-
- default:
- return 0;
- }
-}
static void do_wm_prog( struct brw_context *brw,
struct brw_fragment_program *fp,
struct brw_wm_prog_key *key)
{
- struct brw_wm_compile *c;
+ struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile);
const unsigned *program;
unsigned program_size;
- c = brw->wm.compile_data;
- if (c == NULL) {
- brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
- c = brw->wm.compile_data;
- } else {
- memset(c, 0, sizeof(*brw->wm.compile_data));
- }
- memcpy(&c->key, key, sizeof(*key));
-
+ c->key = *key;
c->fp = fp;
+
+ c->delta_xy[0] = brw_null_reg();
+ c->delta_xy[1] = brw_null_reg();
+ c->pixel_xy[0] = brw_null_reg();
+ c->pixel_xy[1] = brw_null_reg();
+ c->pixel_w = brw_null_reg();
+
+
fprintf(stderr, "XXXXXXXX FP\n");
-#if 0
- c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
-
- if (brw_wm_is_glsl(&c->fp->program)) {
- brw_wm_glsl_emit(c);
- } else
- {
- /* Augment fragment program. Add instructions for pre- and
- * post-fragment-program tasks such as interpolation and fogging.
- */
- brw_wm_pass_fp(c);
-
- /* Translate to intermediate representation. Build register usage
- * chains.
- */
- brw_wm_pass0(c);
-
- /* Dead code removal.
- */
- brw_wm_pass1(c);
-
- /* Register allocation.
- */
- c->grf_limit = BRW_WM_MAX_GRF/2;
-
- /* This is where we start emitting gen4 code:
- */
- brw_init_compile(&c->func);
-
- brw_wm_pass2(c);
-
- c->prog_data.total_grf = c->max_wm_grf;
- if (c->last_scratch) {
- c->prog_data.total_scratch =
- c->last_scratch + 0x40;
- } else {
- c->prog_data.total_scratch = 0;
- }
-
- /* Emit GEN4 code.
- */
- brw_wm_emit(c);
- }
+ brw_wm_glsl_emit(c);
+
/* get the program
*/
program = brw_get_program(&c->func, &program_size);
@@ -193,7 +74,8 @@ static void do_wm_prog( struct brw_context *brw,
program_size,
&c->prog_data,
&brw->wm.prog_data );
-#endif
+
+ FREE(c);
}
@@ -206,8 +88,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
(struct brw_fragment_program *)brw->attribs.FragmentProgram;
unsigned lookup = 0;
unsigned line_aa;
- unsigned i;
-
+
memset(key, 0, sizeof(*key));
/* Build the index for table lookup
@@ -274,14 +155,10 @@ static void brw_wm_populate_key( struct brw_context *brw,
#if 0
- /* BRW_NEW_WM_INPUT_DIMENSIONS */
- key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS);
-#endif
-
- /* _NEW_LIGHT */
- key->flat_shade = (brw->attribs.Raster->flatshade);
-
- /* _NEW_TEXTURE */
+ /* BRW_NEW_SAMPLER
+ *
+ * Not doing any of this at the moment:
+ */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
const struct pipe_sampler_state *unit = brw->attribs.Samplers[i];
@@ -291,12 +168,11 @@ static void brw_wm_populate_key( struct brw_context *brw,
unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
key->shadowtex_mask |= 1<<i;
}
-#if 0
if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA)
key->yuvtex_mask |= 1<<i;
-#endif
}
}
+#endif
/* Extra info:
@@ -329,7 +205,6 @@ static void brw_upload_wm_prog( struct brw_context *brw )
const struct brw_tracked_state brw_wm_prog = {
.dirty = {
.brw = (BRW_NEW_FS |
- BRW_NEW_WM_INPUT_DIMENSIONS |
BRW_NEW_REDUCED_PRIMITIVE),
.cache = 0
},
diff --git a/src/mesa/pipe/i965simple/brw_wm.h b/src/mesa/pipe/i965simple/brw_wm.h
index a394e25da3..a1ac0f504a 100644
--- a/src/mesa/pipe/i965simple/brw_wm.h
+++ b/src/mesa/pipe/i965simple/brw_wm.h
@@ -60,86 +60,19 @@ struct brw_wm_prog_key {
unsigned aa_dest_stencil_reg:3;
unsigned dest_depth_reg:3;
unsigned nr_depth_regs:3;
- unsigned projtex_mask:8;
unsigned shadowtex_mask:8;
unsigned computes_depth:1; /* could be derived from program string */
unsigned source_depth_to_render_target:1;
- unsigned flat_shade:1;
unsigned runtime_check_aads_emit:1;
-
- unsigned yuvtex_mask:8;
- unsigned pad1:24;
-
- unsigned program_string_id:32;
-};
-
-
-/* A bit of a glossary:
- *
- * brw_wm_value: A computed value or program input. Values are
- * constant, they are created once and are never modified. When a
- * fragment program register is written or overwritten, new values are
- * created fresh, preserving the rule that values are constant.
- *
- * brw_wm_ref: A reference to a value. Wherever a value used is by an
- * instruction or as a program output, that is tracked with an
- * instance of this struct. All references to a value occur after it
- * is created. After the last reference, a value is dead and can be
- * discarded.
- *
- * brw_wm_grf: Represents a physical hardware register. May be either
- * empty or hold a value. Register allocation is the process of
- * assigning values to grf registers. This occurs in pass2 and the
- * brw_wm_grf struct is not used before that.
- *
- * Fragment program registers: These are time-varying constructs that
- * are hard to reason about and which we translate away in pass0. A
- * single fragment program register element (eg. temp[0].x) will be
- * translated to one or more brw_wm_value structs, one for each time
- * that temp[0].x is written to during the program.
- */
-
-
-/* Used in pass2 to track register allocation.
- */
-struct brw_wm_grf {
- struct brw_wm_value *value;
- unsigned nextuse;
-};
+ unsigned yuvtex_mask:8;
-struct brw_wm_value {
- struct brw_reg hw_reg; /* emitted to this reg, may not always be there */
- struct brw_wm_ref *lastuse;
- struct brw_wm_grf *resident;
- unsigned contributes_to_output:1;
- unsigned spill_slot:16; /* if non-zero, spill immediately after calculation */
+ unsigned program_string_id;
};
-struct brw_wm_ref {
- struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */
- struct brw_wm_value *value;
- struct brw_wm_ref *prevuse;
- unsigned unspill_reg:7; /* unspill to reg */
- unsigned emitted:1;
- unsigned insn:24;
-};
-struct brw_wm_constref {
- const struct brw_wm_ref *ref;
- float constval;
-};
-struct brw_wm_instruction {
- struct brw_wm_value *dst[4];
- struct brw_wm_ref *src[3][4];
- unsigned opcode:8;
- unsigned saturate:1;
- unsigned writemask:4;
- unsigned tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */
- unsigned tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
-};
#define PROGRAM_INTERNAL_PARAM
#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */
@@ -151,124 +84,59 @@ struct brw_wm_instruction {
#define BRW_WM_MAX_CONST 256
#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
-
-
-/* New opcodes to track internal operations required for WM unit.
- * These are added early so that the registers used can be tracked,
- * freed and reused like those of other instructions.
- */
-#define WM_PIXELXY (TGSI_OPCODE_LAST)
-#define WM_DELTAXY (TGSI_OPCODE_LAST + 1)
-#define WM_PIXELW (TGSI_OPCODE_LAST + 2)
-#define WM_LINTERP (TGSI_OPCODE_LAST + 3)
-#define WM_PINTERP (TGSI_OPCODE_LAST + 4)
-#define WM_CINTERP (TGSI_OPCODE_LAST + 5)
-#define WM_WPOSXY (TGSI_OPCODE_LAST + 6)
-#define WM_FB_WRITE (TGSI_OPCODE_LAST + 7)
-#define MAX_WM_OPCODE (TGSI_OPCODE_LAST + 8)
-
#define PAYLOAD_DEPTH (PIPE_ATTRIB_MAX)
+#define MAX_IFSN 32
+#define MAX_LOOP_DEPTH 32
+
struct brw_wm_compile {
struct brw_compile func;
struct brw_wm_prog_key key;
- struct brw_wm_prog_data prog_data;
+ struct brw_wm_prog_data prog_data; /* result */
struct brw_fragment_program *fp;
- float (*env_param)[4];
-
- enum {
- START,
- PASS2_DONE
- } state;
-
- /* Initial pass - translate fp instructions to fp instructions,
- * simplifying and adding instructions for interpolation and
- * framebuffer writes.
- */
- const struct pipe_shader_state *prog_instructions;
- unsigned nr_fp_insns;
- unsigned fp_temp;
- unsigned fp_interp_emitted;
- unsigned fp_deriv_emitted;
-
- struct tgsi_src_register pixel_xy;
- struct tgsi_src_register delta_xy;
- struct tgsi_src_register pixel_w;
-
-
- struct brw_wm_value vreg[BRW_WM_MAX_VREG];
- unsigned nr_vreg;
-
- struct brw_wm_value creg[BRW_WM_MAX_PARAM];
- unsigned nr_creg;
+ unsigned grf_limit;
+ unsigned max_wm_grf;
- struct {
- struct brw_wm_value depth[4]; /* includes r0/r1 */
- struct brw_wm_value input_interp[PIPE_ATTRIB_MAX];
- } payload;
+ struct brw_reg pixel_xy[2];
+ struct brw_reg delta_xy[2];
+ struct brw_reg pixel_w;
- const struct brw_wm_ref *pass0_fp_reg[16][256][4];
- struct brw_wm_ref undef_ref;
- struct brw_wm_value undef_value;
+ struct brw_reg wm_regs[8][32][4];
- struct brw_wm_ref refs[BRW_WM_MAX_REF];
- unsigned nr_refs;
+ struct brw_reg payload_depth[4];
+ struct brw_reg payload_coef[16];
- struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
- unsigned nr_insns;
+ struct brw_reg emit_mask_reg;
- struct brw_wm_constref constref[BRW_WM_MAX_CONST];
- unsigned nr_constrefs;
+ struct brw_instruction *if_inst[MAX_IFSN];
+ int if_insn;
- struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
+ struct brw_instruction *loop_inst[MAX_LOOP_DEPTH];
+ int loop_insn;
- unsigned grf_limit;
- unsigned max_wm_grf;
- unsigned last_scratch;
+ struct brw_instruction *inst0;
+ struct brw_instruction *inst1;
- struct {
- boolean inited;
- struct brw_reg reg;
- } wm_regs[16][256][4];
struct brw_reg stack;
- struct brw_reg emit_mask_reg;
+ struct brw_indirect stack_index;
+
unsigned reg_index;
+
+ unsigned tmp_start;
unsigned tmp_index;
};
-unsigned brw_wm_nr_args( unsigned opcode );
-unsigned brw_wm_is_scalar_result( unsigned opcode );
-
-void brw_wm_pass_fp( struct brw_wm_compile *c );
-void brw_wm_pass0( struct brw_wm_compile *c );
-void brw_wm_pass1( struct brw_wm_compile *c );
-void brw_wm_pass2( struct brw_wm_compile *c );
-void brw_wm_emit( struct brw_wm_compile *c );
-
-void brw_wm_print_value( struct brw_wm_compile *c,
- struct brw_wm_value *value );
-
-void brw_wm_print_ref( struct brw_wm_compile *c,
- struct brw_wm_ref *ref );
-
-void brw_wm_print_insn( struct brw_wm_compile *c,
- struct brw_wm_instruction *inst );
-
-void brw_wm_print_program( struct brw_wm_compile *c,
- const char *stage );
void brw_wm_lookup_iz( unsigned line_aa,
unsigned lookup,
struct brw_wm_prog_key *key );
-#if 0
-boolean brw_wm_is_glsl(struct gl_fragment_program *fp);
void brw_wm_glsl_emit(struct brw_wm_compile *c);
-#endif
+void brw_wm_emit_decls(struct brw_wm_compile *c);
#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_decl.c b/src/mesa/pipe/i965simple/brw_wm_decl.c
new file mode 100644
index 0000000000..392f17fad6
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_decl.c
@@ -0,0 +1,377 @@
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+ c->tmp_index++;
+ c->reg_index = MAX2(c->reg_index, c->tmp_index);
+ return brw_vec8_grf(c->tmp_start + c->tmp_index, 0);
+}
+
+static void release_tmps(struct brw_wm_compile *c)
+{
+ c->tmp_index = 0;
+}
+
+
+
+static int is_null( struct brw_reg reg )
+{
+ return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ reg.nr == BRW_ARF_NULL);
+}
+
+static void emit_pixel_xy( struct brw_wm_compile *c )
+{
+ if (is_null(c->pixel_xy[0])) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
+
+ c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
+ c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
+
+ /* Calculate pixel centers by adding 1 or 0 to each of the
+ * micro-tile coordinates passed in r1.
+ */
+ brw_ADD(p,
+ c->pixel_xy[0],
+ stride(suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+
+ brw_ADD(p,
+ c->pixel_xy[1],
+ stride(suboffset(r1_uw, 5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+ }
+}
+
+
+
+
+
+
+static void emit_delta_xy( struct brw_wm_compile *c )
+{
+ if (is_null(c->delta_xy[0])) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+
+ emit_pixel_xy(c);
+
+ c->delta_xy[0] = alloc_tmp(c);
+ c->delta_xy[1] = alloc_tmp(c);
+
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ brw_ADD(p,
+ c->delta_xy[0],
+ retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW),
+ negate(r1));
+
+ brw_ADD(p,
+ c->delta_xy[1],
+ retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
+ }
+}
+
+
+
+#if 0
+static void emit_pixel_w( struct brw_wm_compile *c )
+{
+ if (is_null(c->pixel_w)) {
+ struct brw_compile *p = &c->func;
+
+ struct brw_reg interp_wpos = c->coef_wpos;
+
+ c->pixel_w = alloc_tmp(c);
+
+ emit_delta_xy(c);
+
+ /* Calc 1/w - just linterp wpos[3] optimized by putting the
+ * result straight into a message reg.
+ */
+ struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4);
+ brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]);
+
+ /* Calc w */
+ brw_math_16( p,
+ c->pixel_w,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ }
+}
+#endif
+
+
+static void emit_cinterp(struct brw_wm_compile *c,
+ int idx,
+ int mask )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg interp[4];
+ struct brw_reg coef = c->payload_coef[idx];
+ int i;
+
+ interp[0] = brw_vec1_grf(coef.nr, 0);
+ interp[1] = brw_vec1_grf(coef.nr, 4);
+ interp[2] = brw_vec1_grf(coef.nr+1, 0);
+ interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
+ brw_MOV(p, dst, suboffset(interp[i],3));
+ }
+ }
+}
+
+static void emit_linterp(struct brw_wm_compile *c,
+ int idx,
+ int mask )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg interp[4];
+ struct brw_reg coef = c->payload_coef[idx];
+ int i;
+
+ emit_delta_xy(c);
+
+ interp[0] = brw_vec1_grf(coef.nr, 0);
+ interp[1] = brw_vec1_grf(coef.nr, 4);
+ interp[2] = brw_vec1_grf(coef.nr+1, 0);
+ interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
+ brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
+ brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
+ }
+ }
+}
+
+#if 0
+static void emit_pinterp(struct brw_wm_compile *c,
+ int idx,
+ int mask )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg interp[4];
+ struct brw_reg coef = c->payload_coef[idx];
+ int i;
+
+ get_delta_xy(c);
+ get_pixel_w(c);
+
+ interp[0] = brw_vec1_grf(coef.nr, 0);
+ interp[1] = brw_vec1_grf(coef.nr, 4);
+ interp[2] = brw_vec1_grf(coef.nr+1, 0);
+ interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i);
+ brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
+ brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
+ brw_MUL(p, dst, dst, c->pixel_w);
+ }
+ }
+}
+#endif
+
+
+
+#if 0
+static void emit_wpos( )
+{
+ struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
+ struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+ struct tgsi_full_src_register deltas = get_delta_xy(c);
+ struct tgsi_full_src_register arg2;
+ unsigned opcode;
+
+ opcode = WM_LINTERP;
+ arg2 = src_undef();
+
+ /* Have to treat wpos.xy specially:
+ */
+ emit_op(c,
+ WM_WPOSXY,
+ dst_mask(dst, WRITEMASK_XY),
+ 0, 0, 0,
+ get_pixel_xy(c),
+ src_undef(),
+ src_undef());
+
+ dst = dst_mask(dst, WRITEMASK_ZW);
+
+ /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+ */
+ emit_op(c,
+ WM_LINTERP,
+ dst,
+ 0, 0, 0,
+ interp,
+ deltas,
+ arg2);
+}
+#endif
+
+
+
+
+/* Perform register allocation:
+ *
+ * -- r0???
+ * -- passthrough depth regs (and stencil/aa??)
+ * -- curbe ??
+ * -- inputs (coefficients)
+ *
+ * Use a totally static register allocation. This will perform poorly
+ * but is an easy way to get started (again).
+ */
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+ int i, j;
+ int nr_curbe_regs = 0;
+
+ /* R0, then some depth related regs:
+ */
+ for (i = 0; i < c->key.nr_depth_regs; i++) {
+ c->payload_depth[i] = brw_vec8_grf(i*2, 0);
+ c->reg_index += 2;
+ }
+
+
+ /* Then a copy of our part of the CURBE entry:
+ */
+ {
+ int nr_constants = c->fp->info.nr_regs[TGSI_FILE_CONSTANT];
+ int index = 0;
+
+ c->prog_data.max_const = 4*nr_constants;
+ for (i = 0; i < nr_constants; i++) {
+ for (j = 0; j < 4; j++, index++)
+ c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8,
+ index%8);
+ }
+
+ nr_curbe_regs = 2*((4*nr_constants+15)/16);
+ c->reg_index += nr_curbe_regs;
+ }
+
+ /* Next we receive the plane coefficients for parameter
+ * interpolation:
+ */
+ for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) {
+ c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0);
+ c->reg_index += 2;
+ }
+
+ c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.urb_read_length = c->fp->program.num_inputs * 2;
+ c->prog_data.curb_read_length = nr_curbe_regs;
+
+ /* That's the end of the payload, now we can start allocating registers.
+ */
+ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+ c->reg_index++;
+
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+ c->reg_index += 2;
+
+ /* Now allocate room for the interpolated inputs and staging
+ * registers for the outputs:
+ */
+ for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++)
+ for (j = 0; j < 4; j++)
+ c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
+
+ for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_OUTPUT]; i++)
+ for (j = 0; j < 4; j++)
+ c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
+
+ /* Beyond this we should only need registers for internal temporaries:
+ */
+ c->tmp_start = c->reg_index;
+}
+
+
+
+
+
+/* Need to interpolate fragment program inputs in as a preamble to the
+ * shader. A more sophisticated compiler would do this on demand, but
+ * we'll do it up front:
+ */
+void brw_wm_emit_decls(struct brw_wm_compile *c)
+{
+ struct tgsi_parse_context parse;
+ int done = 0;
+
+ prealloc_reg(c);
+
+ tgsi_parse_init( &parse, c->fp->program.tokens );
+
+ while( !done &&
+ !tgsi_parse_end_of_tokens( &parse ) )
+ {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
+ unsigned first = decl->u.DeclarationRange.First;
+ unsigned last = decl->u.DeclarationRange.Last;
+ unsigned mask = decl->Declaration.UsageMask; /* ? */
+ unsigned i;
+
+ if (decl->Declaration.File != TGSI_FILE_INPUT)
+ break;
+
+ assert(decl->Declaration.Interpolate);
+
+ for( i = first; i <= last; i++ ) {
+ switch (decl->Interpolation.Interpolate) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ emit_cinterp(c, i, mask);
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ emit_linterp(c, i, mask);
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ //emit_pinterp(c, i, mask);
+ emit_linterp(c, i, mask);
+ break;
+ }
+ }
+ break;
+ }
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ default:
+ done = 1;
+ break;
+ }
+ }
+
+ tgsi_parse_free (&parse);
+
+ release_tmps(c);
+}
diff --git a/src/mesa/pipe/i965simple/brw_wm_fp.c b/src/mesa/pipe/i965simple/brw_wm_fp.c
deleted file mode 100644
index 20e90bc612..0000000000
--- a/src/mesa/pipe/i965simple/brw_wm_fp.c
+++ /dev/null
@@ -1,1007 +0,0 @@
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "brw_context.h"
-#include "brw_wm.h"
-#include "brw_util.h"
-
-
-#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
-
-#define X 0
-#define Y 1
-#define Z 2
-#define W 3
-
-
-static const char *wm_opcode_strings[] = {
- "PIXELXY",
- "DELTAXY",
- "PIXELW",
- "LINTERP",
- "PINTERP",
- "CINTERP",
- "WPOSXY",
- "FB_WRITE"
-};
-
-#if 0
-static const char *wm_file_strings[] = {
- "PAYLOAD"
-};
-#endif
-
-
-/***********************************************************************
- * Source regs
- */
-#if 0
-static struct prog_src_register src_reg(unsigned file, unsigned idx)
-{
- struct prog_src_register reg;
- reg.File = file;
- reg.Index = idx;
- reg.Swizzle = SWIZZLE_NOOP;
- reg.RelAddr = 0;
- reg.NegateBase = 0;
- reg.Abs = 0;
- reg.NegateAbs = 0;
- return reg;
-}
-
-static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
-{
- return src_reg(dst.File, dst.Index);
-}
-
-static struct prog_src_register src_undef( void )
-{
- return src_reg(PROGRAM_UNDEFINED, 0);
-}
-
-static boolean src_is_undef(struct prog_src_register src)
-{
- return src.File == PROGRAM_UNDEFINED;
-}
-
-static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
-{
- reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
- return reg;
-}
-
-static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
-{
- return src_swizzle(reg, x, x, x, x);
-}
-
-
-/***********************************************************************
- * Dest regs
- */
-
-static struct prog_dst_register dst_reg(unsigned file, unsigned idx)
-{
- struct prog_dst_register reg;
- reg.File = file;
- reg.Index = idx;
- reg.WriteMask = WRITEMASK_XYZW;
- reg.CondMask = 0;
- reg.CondSwizzle = 0;
- reg.pad = 0;
- reg.CondSrc = 0;
- return reg;
-}
-
-static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
-{
- reg.WriteMask &= mask;
- return reg;
-}
-
-static struct prog_dst_register dst_undef( void )
-{
- return dst_reg(PROGRAM_UNDEFINED, 0);
-}
-
-
-
-static struct prog_dst_register get_temp( struct brw_wm_compile *c )
-{
- int bit = ffs( ~c->fp_temp );
-
- if (!bit) {
- _mesa_printf("%s: out of temporaries\n", __FILE__);
- exit(1);
- }
-
- c->fp_temp |= 1<<(bit-1);
- return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
-}
-
-
-static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
-{
- c->fp_temp &= ~1<<(temp.Index + 1 - FIRST_INTERNAL_TEMP);
-}
-
-
-/***********************************************************************
- * Instructions
- */
-
-static const struct tgsi_token *get_fp_inst(struct brw_wm_compile *c)
-{
- return &c->prog_instructions->tokens[c->nr_fp_insns++];
-}
-
-static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
- const struct prog_instruction *inst0)
-{
- struct prog_instruction *inst = get_fp_inst(c);
- *inst = *inst0;
- inst->Data = (void *)inst0;
- return inst;
-}
-
-static struct prog_instruction * emit_op(struct brw_wm_compile *c,
- unsigned op,
- struct prog_dst_register dest,
- unsigned saturate,
- unsigned tex_src_unit,
- unsigned tex_src_target,
- struct prog_src_register src0,
- struct prog_src_register src1,
- struct prog_src_register src2 )
-{
- struct prog_instruction *inst = get_fp_inst(c);
-
- memset(inst, 0, sizeof(*inst));
-
- inst->Opcode = op;
- inst->DstReg = dest;
- inst->SaturateMode = saturate;
- inst->TexSrcUnit = tex_src_unit;
- inst->TexSrcTarget = tex_src_target;
- inst->SrcReg[0] = src0;
- inst->SrcReg[1] = src1;
- inst->SrcReg[2] = src2;
- return inst;
-}
-
-
-
-
-/***********************************************************************
- * Special instructions for interpolation and other tasks
- */
-
-static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
-{
- if (src_is_undef(c->pixel_xy)) {
- struct prog_dst_register pixel_xy = get_temp(c);
- struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
-
-
- /* Emit the out calculations, and hold onto the results. Use
- * two instructions as a temporary is required.
- */
- /* pixel_xy.xy = PIXELXY payload[0];
- */
- emit_op(c,
- WM_PIXELXY,
- dst_mask(pixel_xy, WRITEMASK_XY),
- 0, 0, 0,
- payload_r0_depth,
- src_undef(),
- src_undef());
-
- c->pixel_xy = src_reg_from_dst(pixel_xy);
- }
-
- return c->pixel_xy;
-}
-
-static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
-{
- if (src_is_undef(c->delta_xy)) {
- struct prog_dst_register delta_xy = get_temp(c);
- struct prog_src_register pixel_xy = get_pixel_xy(c);
- struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
-
- /* deltas.xy = DELTAXY pixel_xy, payload[0]
- */
- emit_op(c,
- WM_DELTAXY,
- dst_mask(delta_xy, WRITEMASK_XY),
- 0, 0, 0,
- pixel_xy,
- payload_r0_depth,
- src_undef());
-
- c->delta_xy = src_reg_from_dst(delta_xy);
- }
-
- return c->delta_xy;
-}
-
-static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
-{
- if (src_is_undef(c->pixel_w)) {
- struct prog_dst_register pixel_w = get_temp(c);
- struct prog_src_register deltas = get_delta_xy(c);
- struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
-
-
- /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
- */
- emit_op(c,
- WM_PIXELW,
- dst_mask(pixel_w, WRITEMASK_W),
- 0, 0, 0,
- interp_wpos,
- deltas,
- src_undef());
-
-
- c->pixel_w = src_reg_from_dst(pixel_w);
- }
-
- return c->pixel_w;
-}
-
-static void emit_interp( struct brw_wm_compile *c,
- unsigned idx )
-{
- struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
- struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
- struct prog_src_register deltas = get_delta_xy(c);
- struct prog_src_register arg2;
- unsigned opcode;
-
- /* Need to use PINTERP on attributes which have been
- * multiplied by 1/W in the SF program, and LINTERP on those
- * which have not:
- */
- switch (idx) {
- case FRAG_ATTRIB_WPOS:
- opcode = WM_LINTERP;
- arg2 = src_undef();
-
- /* Have to treat wpos.xy specially:
- */
- emit_op(c,
- WM_WPOSXY,
- dst_mask(dst, WRITEMASK_XY),
- 0, 0, 0,
- get_pixel_xy(c),
- src_undef(),
- src_undef());
-
- dst = dst_mask(dst, WRITEMASK_ZW);
-
- /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
- */
- emit_op(c,
- WM_LINTERP,
- dst,
- 0, 0, 0,
- interp,
- deltas,
- arg2);
- break;
- case FRAG_ATTRIB_COL0:
- case FRAG_ATTRIB_COL1:
- if (c->key.flat_shade) {
- emit_op(c,
- WM_CINTERP,
- dst,
- 0, 0, 0,
- interp,
- src_undef(),
- src_undef());
- }
- else {
- emit_op(c,
- WM_LINTERP,
- dst,
- 0, 0, 0,
- interp,
- deltas,
- src_undef());
- }
- break;
- default:
- emit_op(c,
- WM_PINTERP,
- dst,
- 0, 0, 0,
- interp,
- deltas,
- get_pixel_w(c));
- break;
- }
-
- c->fp_interp_emitted |= 1<<idx;
-}
-
-static void emit_ddx( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- unsigned idx = inst->SrcReg[0].Index;
- struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-
- c->fp_deriv_emitted |= 1<<idx;
- emit_op(c,
- OPCODE_DDX,
- inst->DstReg,
- 0, 0, 0,
- interp,
- get_pixel_w(c),
- src_undef());
-}
-
-static void emit_ddy( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- unsigned idx = inst->SrcReg[0].Index;
- struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-
- c->fp_deriv_emitted |= 1<<idx;
- emit_op(c,
- OPCODE_DDY,
- inst->DstReg,
- 0, 0, 0,
- interp,
- get_pixel_w(c),
- src_undef());
-}
-
-/***********************************************************************
- * Hacks to extend the program parameter and constant lists.
- */
-
-/* Add the fog parameters to the parameter list of the original
- * program, rather than creating a new list. Doesn't really do any
- * harm and it's not as if the parameter handling isn't a big hack
- * anyway.
- */
-static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
- int s0,
- int s1,
- int s2,
- int s3,
- int s4)
-{
- struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
- gl_state_index tokens[STATE_LENGTH];
- unsigned idx;
- tokens[0] = s0;
- tokens[1] = s1;
- tokens[2] = s2;
- tokens[3] = s3;
- tokens[4] = s4;
-
- for (idx = 0; idx < paramList->NumParameters; idx++) {
- if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
- memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
- return src_reg(PROGRAM_STATE_VAR, idx);
- }
-
- idx = _mesa_add_state_reference( paramList, tokens );
-
- /* Recalculate state dependency:
- */
- c->fp->param_state = paramList->StateFlags;
-
- return src_reg(PROGRAM_STATE_VAR, idx);
-}
-
-
-static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
- float s0,
- float s1,
- float s2,
- float s3)
-{
- struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
- float values[4];
- unsigned idx;
- unsigned swizzle;
-
- values[0] = s0;
- values[1] = s1;
- values[2] = s2;
- values[3] = s3;
-
- /* Have to search, otherwise multiple compilations will each grow
- * the parameter list.
- */
- for (idx = 0; idx < paramList->NumParameters; idx++) {
- if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
- memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
-
- /* XXX: this mimics the mesa bug which puts all constants and
- * parameters into the "PROGRAM_STATE_VAR" category:
- */
- return src_reg(PROGRAM_STATE_VAR, idx);
- }
-
- idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
- /* XXX what about swizzle? */
- return src_reg(PROGRAM_STATE_VAR, idx);
-}
-
-
-
-/***********************************************************************
- * Expand various instructions here to simpler forms.
- */
-static void precalc_dst( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- struct prog_src_register src0 = inst->SrcReg[0];
- struct prog_src_register src1 = inst->SrcReg[1];
- struct prog_dst_register dst = inst->DstReg;
-
- if (dst.WriteMask & WRITEMASK_Y) {
- /* dst.y = mul src0.y, src1.y
- */
- emit_op(c,
- OPCODE_MUL,
- dst_mask(dst, WRITEMASK_Y),
- inst->SaturateMode, 0, 0,
- src0,
- src1,
- src_undef());
- }
-
-
- if (dst.WriteMask & WRITEMASK_XZ) {
- unsigned z = GET_SWZ(src0.Swizzle, Z);
-
- /* dst.xz = swz src0.1zzz
- */
- emit_op(c,
- OPCODE_SWZ,
- dst_mask(dst, WRITEMASK_XZ),
- inst->SaturateMode, 0, 0,
- src_swizzle(src0, SWIZZLE_ONE, z, z, z),
- src_undef(),
- src_undef());
- }
- if (dst.WriteMask & WRITEMASK_W) {
- /* dst.w = mov src1.w
- */
- emit_op(c,
- OPCODE_MOV,
- dst_mask(dst, WRITEMASK_W),
- inst->SaturateMode, 0, 0,
- src1,
- src_undef(),
- src_undef());
- }
-}
-
-
-static void precalc_lit( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- struct prog_src_register src0 = inst->SrcReg[0];
- struct prog_dst_register dst = inst->DstReg;
-
- if (dst.WriteMask & WRITEMASK_XW) {
- /* dst.xw = swz src0.1111
- */
- emit_op(c,
- OPCODE_SWZ,
- dst_mask(dst, WRITEMASK_XW),
- 0, 0, 0,
- src_swizzle1(src0, SWIZZLE_ONE),
- src_undef(),
- src_undef());
- }
-
-
- if (dst.WriteMask & WRITEMASK_YZ) {
- emit_op(c,
- OPCODE_LIT,
- dst_mask(dst, WRITEMASK_YZ),
- inst->SaturateMode, 0, 0,
- src0,
- src_undef(),
- src_undef());
- }
-}
-
-static void precalc_tex( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- struct prog_src_register coord;
- struct prog_dst_register tmpcoord;
-
- if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
- struct prog_instruction *out;
- struct prog_dst_register tmp0 = get_temp(c);
- struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
- struct prog_dst_register tmp1 = get_temp(c);
- struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
- struct prog_src_register src0 = inst->SrcReg[0];
-
- tmpcoord = get_temp(c);
- coord = src_reg_from_dst(tmpcoord);
-
- out = emit_op(c, OPCODE_MOV,
- tmpcoord,
- 0, 0, 0,
- src0,
- src_undef(),
- src_undef());
- out->SrcReg[0].NegateBase = 0;
- out->SrcReg[0].Abs = 1;
-
- emit_op(c, OPCODE_MAX,
- tmp0,
- 0, 0, 0,
- src_swizzle1(coord, X),
- src_swizzle1(coord, Y),
- src_undef());
-
- emit_op(c, OPCODE_MAX,
- tmp1,
- 0, 0, 0,
- tmp0src,
- src_swizzle1(coord, Z),
- src_undef());
-
- emit_op(c, OPCODE_RCP,
- tmp0,
- 0, 0, 0,
- tmp1src,
- src_undef(),
- src_undef());
-
- emit_op(c, OPCODE_MUL,
- tmpcoord,
- 0, 0, 0,
- src0,
- tmp0src,
- src_undef());
-
- release_temp(c, tmp0);
- release_temp(c, tmp1);
- } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
- struct prog_src_register scale =
- search_or_add_param5( c,
- STATE_INTERNAL,
- STATE_TEXRECT_SCALE,
- inst->TexSrcUnit,
- 0,0 );
-
- tmpcoord = get_temp(c);
-
- /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
- */
- emit_op(c,
- OPCODE_MUL,
- tmpcoord,
- 0, 0, 0,
- inst->SrcReg[0],
- scale,
- src_undef());
-
- coord = src_reg_from_dst(tmpcoord);
- }
- else {
- coord = inst->SrcReg[0];
- }
-
- /* Need to emit YUV texture conversions by hand. Probably need to
- * do this here - the alternative is in brw_wm_emit.c, but the
- * conversion requires allocating a temporary variable which we
- * don't have the facility to do that late in the compilation.
- */
- if (!(c->key.yuvtex_mask & (1<<inst->TexSrcUnit))) {
- emit_op(c,
- OPCODE_TEX,
- inst->DstReg,
- inst->SaturateMode,
- inst->TexSrcUnit,
- inst->TexSrcTarget,
- coord,
- src_undef(),
- src_undef());
- }
- else {
- /*
- CONST C0 = { -.5, -.0625, -.5, 1.164 }
- CONST C1 = { 1.596, -0.813, 2.018, -.391 }
- UYV = TEX ...
- UYV.xyz = ADD UYV, C0
- UYV.y = MUL UYV.y, C0.w
- RGB.xyz = MAD UYV.xxz, C1, UYV.y
- RGB.y = MAD UYV.z, C1.w, RGB.y
- */
- struct prog_dst_register dst = inst->DstReg;
- struct prog_src_register src0 = inst->SrcReg[0];
- struct prog_dst_register tmp = get_temp(c);
- struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
- struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
- struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
-
- /* tmp = TEX ...
- */
- emit_op(c,
- OPCODE_TEX,
- tmp,
- inst->SaturateMode,
- inst->TexSrcUnit,
- inst->TexSrcTarget,
- src0,
- src_undef(),
- src_undef());
-
- /* tmp.xyz = ADD TMP, C0
- */
- emit_op(c,
- OPCODE_ADD,
- dst_mask(tmp, WRITEMASK_XYZ),
- 0, 0, 0,
- tmpsrc,
- C0,
- src_undef());
-
- /* YUV.y = MUL YUV.y, C0.w
- */
- emit_op(c,
- OPCODE_MUL,
- dst_mask(tmp, WRITEMASK_Y),
- 0, 0, 0,
- tmpsrc,
- src_swizzle1(C0, W),
- src_undef());
-
- /* RGB.xyz = MAD YUV.xxz, C1, YUV.y
- */
- emit_op(c,
- OPCODE_MAD,
- dst_mask(dst, WRITEMASK_XYZ),
- 0, 0, 0,
- src_swizzle(tmpsrc, X,X,Z,Z),
- C1,
- src_swizzle1(tmpsrc, Y));
-
- /* RGB.y = MAD YUV.z, C1.w, RGB.y
- */
- emit_op(c,
- OPCODE_MAD,
- dst_mask(dst, WRITEMASK_Y),
- 0, 0, 0,
- src_swizzle1(tmpsrc, Z),
- src_swizzle1(C1, W),
- src_swizzle1(src_reg_from_dst(dst), Y));
-
- release_temp(c, tmp);
- }
-
- if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV)
- release_temp(c, tmpcoord);
-}
-
-
-static boolean projtex( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- struct prog_src_register src = inst->SrcReg[0];
-
- /* Only try to detect the simplest cases. Could detect (later)
- * cases where we are trying to emit code like RCP {1.0}, MUL x,
- * {1.0}, and so on.
- *
- * More complex cases than this typically only arise from
- * user-provided fragment programs anyway:
- */
- if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
- return 0; /* ut2004 gun rendering !?! */
- else if (src.File == PROGRAM_INPUT &&
- GET_SWZ(src.Swizzle, W) == W &&
- (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
- return 0;
- else
- return 1;
-}
-
-
-static void precalc_txp( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- struct prog_src_register src0 = inst->SrcReg[0];
-
- if (projtex(c, inst)) {
- struct prog_dst_register tmp = get_temp(c);
- struct prog_instruction tmp_inst;
-
- /* tmp0.w = RCP inst.arg[0][3]
- */
- emit_op(c,
- OPCODE_RCP,
- dst_mask(tmp, WRITEMASK_W),
- 0, 0, 0,
- src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
- src_undef(),
- src_undef());
-
- /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
- */
- emit_op(c,
- OPCODE_MUL,
- dst_mask(tmp, WRITEMASK_XYZ),
- 0, 0, 0,
- src0,
- src_swizzle1(src_reg_from_dst(tmp), W),
- src_undef());
-
- /* dst = precalc(TEX tmp0)
- */
- tmp_inst = *inst;
- tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
- precalc_tex(c, &tmp_inst);
-
- release_temp(c, tmp);
- }
- else
- {
- /* dst = precalc(TEX src0)
- */
- precalc_tex(c, inst);
- }
-}
-
-
-
-
-
-/***********************************************************************
- * Add instructions to perform fog blending
- */
-
-static void fog_blend( struct brw_wm_compile *c,
- struct prog_src_register fog_factor )
-{
- struct prog_dst_register outcolor = dst_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
- struct prog_src_register fogcolor = search_or_add_param5( c, STATE_FOG_COLOR, 0,0,0,0 );
-
- /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */
-
- emit_op(c,
- OPCODE_LRP,
- dst_mask(outcolor, WRITEMASK_XYZ),
- 0, 0, 0,
- fog_factor,
- src_reg_from_dst(outcolor),
- fogcolor);
-}
-
-
-
-/* This one is simple - just take the interpolated fog coordinate and
- * use it as the fog blend factor.
- */
-static void fog_interpolated( struct brw_wm_compile *c )
-{
- struct prog_src_register fogc = src_reg(PROGRAM_INPUT, FRAG_ATTRIB_FOGC);
-
- if (!(c->fp_interp_emitted & (1<<FRAG_ATTRIB_FOGC)))
- emit_interp(c, FRAG_ATTRIB_FOGC);
-
- fog_blend( c, src_swizzle1(fogc, GET_SWZ(fogc.Swizzle,X)));
-}
-
-static void emit_fog( struct brw_wm_compile *c )
-{
- if (!c->fp->program.FogOption)
- return;
-
- if (1)
- fog_interpolated( c );
- else {
- /* TODO: per-pixel fog */
- assert(0);
- }
-}
-
-static void emit_fb_write( struct brw_wm_compile *c )
-{
- struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
- struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
- struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
-
- emit_op(c,
- WM_FB_WRITE,
- dst_mask(dst_undef(),0),
- 0, 0, 0,
- outcolor,
- payload_r0_depth,
- outdepth);
-}
-
-
-
-
-/***********************************************************************
- * Emit INTERP instructions ahead of first use of each attrib.
- */
-
-static void validate_src_regs( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- unsigned nr_args = brw_wm_nr_args( inst->Opcode );
- unsigned i;
-
- for (i = 0; i < nr_args; i++) {
- if (inst->SrcReg[i].File == PROGRAM_INPUT) {
- unsigned idx = inst->SrcReg[i].Index;
- if (!(c->fp_interp_emitted & (1<<idx))) {
- emit_interp(c, idx);
- }
- }
- }
-}
-
-
-
-static void print_insns( const struct prog_instruction *insn,
- unsigned nr )
-{
- unsigned i;
- for (i = 0; i < nr; i++, insn++) {
- _mesa_printf("%3d: ", i);
- if (insn->Opcode < MAX_OPCODE)
- _mesa_print_instruction(insn);
- else if (insn->Opcode < MAX_WM_OPCODE) {
- unsigned idx = insn->Opcode - MAX_OPCODE;
-
- _mesa_print_alu_instruction(insn,
- wm_opcode_strings[idx],
- 3);
- }
- else
- _mesa_printf("UNKNOWN\n");
-
- }
-}
-void brw_wm_pass_fp( struct brw_wm_compile *c )
-{
- struct brw_fragment_program *fp = c->fp;
- unsigned insn;
- if (BRW_DEBUG & DEBUG_WM) {
- _mesa_printf("\n\n\npre-fp:\n");
- _mesa_print_program(&fp->program.Base);
- _mesa_printf("\n");
- }
-
- c->pixel_xy = src_undef();
- c->delta_xy = src_undef();
- c->pixel_w = src_undef();
- c->nr_fp_insns = 0;
-
- /* Emit preamble instructions:
- */
-
-
- for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
- const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
- struct prog_instruction *out;
-
- /* Check for INPUT values, emit INTERP instructions where
- * necessary:
- */
- validate_src_regs(c, inst);
-
-
- switch (inst->Opcode) {
- case OPCODE_SWZ:
- out = emit_insn(c, inst);
- out->Opcode = OPCODE_MOV;
- break;
-
- case OPCODE_ABS:
- out = emit_insn(c, inst);
- out->Opcode = OPCODE_MOV;
- out->SrcReg[0].NegateBase = 0;
- out->SrcReg[0].Abs = 1;
- break;
-
- case OPCODE_SUB:
- out = emit_insn(c, inst);
- out->Opcode = OPCODE_ADD;
- out->SrcReg[1].NegateBase ^= 0xf;
- break;
-
- case OPCODE_SCS:
- out = emit_insn(c, inst);
- /* This should probably be done in the parser.
- */
- out->DstReg.WriteMask &= WRITEMASK_XY;
- break;
-
- case OPCODE_DST:
- precalc_dst(c, inst);
- break;
-
- case OPCODE_LIT:
- precalc_lit(c, inst);
- break;
-
- case OPCODE_TXP:
- precalc_txp(c, inst);
- break;
-
- case OPCODE_XPD:
- out = emit_insn(c, inst);
- /* This should probably be done in the parser.
- */
- out->DstReg.WriteMask &= WRITEMASK_XYZ;
- break;
-
- case OPCODE_KIL:
- out = emit_insn(c, inst);
- /* This should probably be done in the parser.
- */
- out->DstReg.WriteMask = 0;
- break;
- case OPCODE_DDX:
- emit_ddx(c, inst);
- break;
- case OPCODE_DDY:
- emit_ddy(c, inst);
- break;
- case OPCODE_END:
- emit_fog(c);
- emit_fb_write(c);
- break;
- case OPCODE_PRINT:
- break;
-
- default:
- emit_insn(c, inst);
- break;
- }
- }
-
- if (BRW_DEBUG & DEBUG_WM) {
- _mesa_printf("\n\n\npass_fp:\n");
- print_insns( c->prog_instructions, c->nr_fp_insns );
- _mesa_printf("\n");
- }
-}
-#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_glsl.c b/src/mesa/pipe/i965simple/brw_wm_glsl.c
index 90e73a605a..d6dfaed826 100644
--- a/src/mesa/pipe/i965simple/brw_wm_glsl.c
+++ b/src/mesa/pipe/i965simple/brw_wm_glsl.c
@@ -2,753 +2,437 @@
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
-#if 0
-
-/* Only guess, need a flag in gl_fragment_program later */
-boolean brw_wm_is_glsl(struct gl_fragment_program *fp)
-{
- int i;
- for (i = 0; i < fp->Base.NumInstructions; i++) {
- struct prog_instruction *inst = &fp->Base.Instructions[i];
- switch (inst->Opcode) {
- case OPCODE_IF:
- case OPCODE_INT:
- case OPCODE_ENDIF:
- case OPCODE_CAL:
- case OPCODE_BRK:
- case OPCODE_RET:
- case OPCODE_DDX:
- case OPCODE_DDY:
- case OPCODE_BGNLOOP:
- return TRUE;
- default:
- break;
- }
- }
- return FALSE;
-}
-static void set_reg(struct brw_wm_compile *c, int file, int index,
- int component, struct brw_reg reg)
-{
- c->wm_regs[file][index][component].reg = reg;
- c->wm_regs[file][index][component].inited = TRUE;
-}
-static int get_scalar_dst_index(struct prog_instruction *inst)
+static int get_scalar_dst_index(struct tgsi_full_instruction *inst)
{
- int i;
- for (i = 0; i < 4; i++)
- if (inst->DstReg.WriteMask & (1<<i))
- break;
- return i;
+ struct tgsi_dst_register dst = inst->FullDstRegisters[0].DstRegister;
+ int i;
+ for (i = 0; i < 4; i++)
+ if (dst.WriteMask & (1<<i))
+ break;
+ return i;
}
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
{
- struct brw_reg reg;
- reg = brw_vec8_grf(c->tmp_index--, 0);
- return reg;
+ c->tmp_index++;
+ c->reg_index = MAX2(c->reg_index, c->tmp_index);
+ return brw_vec8_grf(c->tmp_start + c->tmp_index, 0);
}
static void release_tmps(struct brw_wm_compile *c)
{
- c->tmp_index = 127;
+ c->tmp_index = 0;
}
+
static struct brw_reg
-get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, unsigned neg, unsigned abs)
+get_reg(struct brw_wm_compile *c, int file, int index, int component )
{
- struct brw_reg reg;
- switch (file) {
- case PROGRAM_STATE_VAR:
- case PROGRAM_CONSTANT:
- case PROGRAM_UNIFORM:
- file = PROGRAM_STATE_VAR;
- break;
- case PROGRAM_UNDEFINED:
- return brw_null_reg();
- default:
- break;
- }
-
- if(c->wm_regs[file][index][component].inited)
- reg = c->wm_regs[file][index][component].reg;
- else
- reg = brw_vec8_grf(c->reg_index, 0);
-
- if(!c->wm_regs[file][index][component].inited) {
- set_reg(c, file, index, component, reg);
- c->reg_index++;
- }
-
- if (neg & (1<< component)) {
- reg = negate(reg);
- }
- if (abs)
- reg = brw_abs(reg);
- return reg;
+ switch (file) {
+ case TGSI_FILE_NULL:
+ return brw_null_reg();
+
+ case TGSI_FILE_SAMPLER:
+ /* Should never get here:
+ */
+ assert (0);
+ return brw_null_reg();
+
+ case TGSI_FILE_IMMEDIATE:
+ /* These need a different path:
+ */
+ assert(0);
+ return brw_null_reg();
+
+
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_ADDRESS:
+ return c->wm_regs[file][index][component];
+
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
}
-static void prealloc_reg(struct brw_wm_compile *c)
-{
- int i, j;
- struct brw_reg reg;
- int nr_interp_regs = 0;
- unsigned inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
-
- for (i = 0; i < 4; i++) {
- reg = (i < c->key.nr_depth_regs)
- ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0);
- set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
- }
- c->reg_index += 2*c->key.nr_depth_regs;
- {
- int nr_params = c->fp->program.Base.Parameters->NumParameters;
- struct gl_program_parameter_list *plist =
- c->fp->program.Base.Parameters;
- int index = 0;
- c->prog_data.nr_params = 4*nr_params;
- for (i = 0; i < nr_params; i++) {
- for (j = 0; j < 4; j++, index++) {
- reg = brw_vec1_grf(c->reg_index + index/8,
- index%8);
- c->prog_data.param[index] =
- &plist->ParameterValues[i][j];
- set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
- }
- }
- c->nr_creg = 2*((4*nr_params+15)/16);
- c->reg_index += c->nr_creg;
- }
- for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
- if (inputs & (1<<i)) {
- nr_interp_regs++;
- reg = brw_vec8_grf(c->reg_index, 0);
- for (j = 0; j < 4; j++)
- set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
- c->reg_index += 2;
-
- }
- }
- c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
- c->prog_data.urb_read_length = nr_interp_regs * 2;
- c->prog_data.curb_read_length = c->nr_creg;
- c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
- c->reg_index++;
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
- c->reg_index += 2;
-}
static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
- struct prog_instruction *inst, int component, int nr)
-{
- return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
- 0, 0);
-}
-
-static struct brw_reg get_src_reg(struct brw_wm_compile *c,
- struct prog_src_register *src, int index, int nr)
+ struct tgsi_full_instruction *inst,
+ int component)
{
- int component = GET_SWZ(src->Swizzle, index);
- return get_reg(c, src->File, src->Index, component, nr,
- src->NegateBase, src->Abs);
+ return get_reg(c,
+ inst->FullDstRegisters[0].DstRegister.File,
+ inst->FullDstRegisters[0].DstRegister.Index,
+ component);
}
-static void emit_abs( struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for (i = 0; i < 4; i++) {
- if (inst->DstReg.WriteMask & (1<<i)) {
- struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i, 1);
- src = get_src_reg(c, &inst->SrcReg[0], i, 1);
- brw_MOV(p, dst, brw_abs(src));
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_int( struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i, 1) ;
- src = get_src_reg(c, &inst->SrcReg[0], i, 1);
- brw_RNDD(p, dst, src);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_mov( struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i, 1);
- src = get_src_reg(c, &inst->SrcReg[0], i, 1);
- brw_MOV(p, dst, src);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_pixel_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_reg r1 = brw_vec1_grf(1, 0);
- struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
-
- struct brw_reg dst0, dst1;
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
-
- dst0 = get_dst_reg(c, inst, 0, 1);
- dst1 = get_dst_reg(c, inst, 1, 1);
- /* Calculate pixel centers by adding 1 or 0 to each of the
- * micro-tile coordinates passed in r1.
- */
- if (mask & WRITEMASK_X) {
- brw_ADD(p,
- vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
- stride(suboffset(r1_uw, 4), 2, 4, 0),
- brw_imm_v(0x10101010));
- }
-
- if (mask & WRITEMASK_Y) {
- brw_ADD(p,
- vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
- stride(suboffset(r1_uw, 5), 2, 4, 0),
- brw_imm_v(0x11001100));
- }
-
-}
-
-static void emit_delta_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+static int get_swz( struct tgsi_src_register src, int index )
{
- struct brw_reg r1 = brw_vec1_grf(1, 0);
- struct brw_reg dst0, dst1, src0, src1;
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
-
- dst0 = get_dst_reg(c, inst, 0, 1);
- dst1 = get_dst_reg(c, inst, 1, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1);
- /* Calc delta X,Y by subtracting origin in r1 from the pixel
- * centers.
- */
- if (mask & WRITEMASK_X) {
- brw_ADD(p,
- dst0,
- retype(src0, BRW_REGISTER_TYPE_UW),
- negate(r1));
- }
-
- if (mask & WRITEMASK_Y) {
- brw_ADD(p,
- dst1,
- retype(src1, BRW_REGISTER_TYPE_UW),
- negate(suboffset(r1,1)));
-
- }
-
+ switch (index & 3) {
+ case 0: return src.SwizzleX;
+ case 1: return src.SwizzleY;
+ case 2: return src.SwizzleZ;
+ case 3: return src.SwizzleW;
+ default: return 0;
+ }
}
-
-static void fire_fb_write( struct brw_wm_compile *c,
- unsigned base_reg,
- unsigned nr )
+static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index )
{
- struct brw_compile *p = &c->func;
-
- /* Pass through control information:
- */
- /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
- {
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
- brw_MOV(p,
- brw_message_reg(base_reg + 1),
- brw_vec8_grf(1, 0));
- brw_pop_insn_state(p);
- }
- /* Send framebuffer write message: */
- brw_fb_WRITE(p,
- retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
- base_reg,
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
- 0, /* render surface always 0 */
- nr,
- 0,
- 1);
+ switch (index & 3) {
+ case 0: return src.ExtSwizzleX;
+ case 1: return src.ExtSwizzleY;
+ case 2: return src.ExtSwizzleZ;
+ case 3: return src.ExtSwizzleW;
+ default: return 0;
+ }
}
-static void emit_fb_write(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- int nr = 2;
- int channel;
- struct brw_reg src0;//, src1, src2, dst;
-
- /* Reserve a space for AA - may not be needed:
- */
- if (c->key.aa_dest_stencil_reg)
- nr += 1;
- {
- brw_push_insn_state(p);
- for (channel = 0; channel < 4; channel++) {
- src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1);
- /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
- /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
- brw_MOV(p, brw_message_reg(nr + channel), src0);
- }
- /* skip over the regs populated above: */
- nr += 8;
- brw_pop_insn_state(p);
- }
- fire_fb_write(c, 0, nr);
-}
-
-static void emit_pixel_w( struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- if (mask & WRITEMASK_W) {
- struct brw_reg dst, src0, delta0, delta1;
- struct brw_reg interp3;
-
- dst = get_dst_reg(c, inst, 3, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
- delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
-
- interp3 = brw_vec1_grf(src0.nr+1, 4);
- /* Calc 1/w - just linterp wpos[3] optimized by putting the
- * result straight into a message reg.
- */
- brw_LINE(p, brw_null_reg(), interp3, delta0);
- brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
-
- /* Calc w */
- brw_math_16( p, dst,
- BRW_MATH_FUNCTION_INV,
- BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
- BRW_MATH_PRECISION_FULL);
- }
-}
-
-static void emit_linterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+static struct brw_reg get_src_reg(struct brw_wm_compile *c,
+ struct tgsi_full_src_register *src,
+ int index)
{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst, delta0, delta1;
- struct brw_reg src0;
-
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
- delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
- unsigned nr = src0.nr;
- int i;
-
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- brw_LINE(p, brw_null_reg(), interp[i], delta0);
- brw_MAC(p, dst, suboffset(interp[i],1), delta1);
- }
- }
+ struct brw_reg reg;
+ int component = index;
+ int neg = 0;
+ int abs = 0;
+
+ if (src->SrcRegister.Negate)
+ neg = 1;
+
+ component = get_swz(src->SrcRegister, component);
+
+ /* Yes, there are multiple negates:
+ */
+ switch (component & 3) {
+ case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break;
+ case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break;
+ case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break;
+ case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break;
+ }
+
+ /* And multiple swizzles, fun isn't it:
+ */
+ component = get_ext_swz(src->SrcRegisterExtSwz, component);
+
+ /* Can't handle this, don't know if we need to:
+ */
+ assert(src->SrcRegisterExtSwz.ExtDivide == 0);
+
+ /* Not handling indirect lookups yet:
+ */
+ assert(src->SrcRegister.Indirect == 0);
+
+ /* Don't know what dimension means:
+ */
+ assert(src->SrcRegister.Dimension == 0);
+
+ /* Will never handle any of this stuff:
+ */
+ assert(src->SrcRegisterExtMod.Complement == 0);
+ assert(src->SrcRegisterExtMod.Bias == 0);
+ assert(src->SrcRegisterExtMod.Scale2X == 0);
+
+ if (src->SrcRegisterExtMod.Absolute)
+ abs = 1;
+
+ /* Another negate! This is a post-absolute negate, which we
+ * can't do. Need to clean the crap out of tgsi somehow.
+ */
+ assert(src->SrcRegisterExtMod.Negate == 0);
+
+ switch( component ) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ reg = get_reg(c,
+ src->SrcRegister.File,
+ src->SrcRegister.Index,
+ component );
+
+ if (neg)
+ reg = negate(reg);
+
+ if (abs)
+ reg = brw_abs(reg);
+
+ break;
+
+ /* XXX: this won't really work in the general case, but we know
+ * that the extended swizzle is only allowed in the SWZ
+ * instruction (right??), in which case using an immediate
+ * directly will work.
+ */
+ case TGSI_EXTSWIZZLE_ZERO:
+ reg = brw_imm_f(0);
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ if (neg && !abs)
+ reg = brw_imm_f(-1.0);
+ else
+ reg = brw_imm_f(1.0);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+
+ return reg;
}
-static void emit_cinterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+static void emit_abs( struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
-
- struct brw_reg interp[4];
- struct brw_reg dst, src0;
-
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- unsigned nr = src0.nr;
- int i;
-
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- brw_MOV(p, dst, suboffset(interp[i],3));
- }
- }
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+
+ int i;
+ struct brw_compile *p = &c->func;
+ brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg src, dst;
+ dst = get_dst_reg(c, inst, i);
+ src = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ brw_MOV(p, dst, brw_abs(src)); /* NOTE */
+ }
+ }
+ brw_set_saturate(p, 0);
}
-static void emit_pinterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
-
- struct brw_reg interp[4];
- struct brw_reg dst, delta0, delta1;
- struct brw_reg src0, w;
-
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
- delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
- w = get_src_reg(c, &inst->SrcReg[2], 3, 1);
- unsigned nr = src0.nr;
- int i;
-
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- brw_LINE(p, brw_null_reg(), interp[i], delta0);
- brw_MAC(p, dst, suboffset(interp[i],1),
- delta1);
- brw_MUL(p, dst, dst, w);
- }
- }
-}
static void emit_xpd(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ struct tgsi_full_instruction *inst)
{
- int i;
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- for (i = 0; i < 4; i++) {
- unsigned i2 = (i+2)%3;
- unsigned i1 = (i+1)%3;
- if (mask & (1<<i)) {
- struct brw_reg src0, src1, dst;
- dst = get_dst_reg(c, inst, i, 1);
- src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1));
- src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1);
- brw_MUL(p, brw_null_reg(), src0, src1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- brw_MAC(p, dst, src0, src1);
- brw_set_saturate(p, 0);
- }
- }
- brw_set_saturate(p, 0);
+ int i;
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ for (i = 0; i < 4; i++) {
+ unsigned i2 = (i+2)%3;
+ unsigned i1 = (i+1)%3;
+ if (mask & (1<<i)) {
+ struct brw_reg src0, src1, dst;
+ dst = get_dst_reg(c, inst, i);
+ src0 = negate(get_src_reg(c, &inst->FullSrcRegisters[0], i2));
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1);
+ brw_MUL(p, brw_null_reg(), src0, src1);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2);
+ brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+ brw_MAC(p, dst, src0, src1);
+ brw_set_saturate(p, 0);
+ }
+ }
+ brw_set_saturate(p, 0);
}
static void emit_dp3(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ struct tgsi_full_instruction *inst)
{
- struct brw_reg src0[3], src1[3], dst;
- int i;
- struct brw_compile *p = &c->func;
- for (i = 0; i < 3; i++) {
- src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
- }
-
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
- brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
- brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MAC(p, dst, src0[2], src1[2]);
- brw_set_saturate(p, 0);
+ struct brw_reg src0[3], src1[3], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ for (i = 0; i < 3; i++) {
+ src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ }
+
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, 0);
}
static void emit_dp4(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ struct tgsi_full_instruction *inst)
{
- struct brw_reg src0[4], src1[4], dst;
- int i;
- struct brw_compile *p = &c->func;
- for (i = 0; i < 4; i++) {
- src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
- }
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
- brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
- brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
- brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MAC(p, dst, src0[3], src1[3]);
- brw_set_saturate(p, 0);
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ }
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_MAC(p, dst, src0[3], src1[3]);
+ brw_set_saturate(p, 0);
}
static void emit_dph(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ struct tgsi_full_instruction *inst)
{
- struct brw_reg src0[4], src1[4], dst;
- int i;
- struct brw_compile *p = &c->func;
- for (i = 0; i < 4; i++) {
- src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
- }
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
- brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
- brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
- brw_MAC(p, dst, src0[2], src1[2]);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_ADD(p, dst, src0[3], src1[3]);
- brw_set_saturate(p, 0);
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ }
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_ADD(p, dst, src0[3], src1[3]);
+ brw_set_saturate(p, 0);
}
static void emit_math1(struct brw_wm_compile *c,
- struct prog_instruction *inst, unsigned func)
+ struct tgsi_full_instruction *inst, unsigned func)
{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, dst;
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
- brw_MOV(p, brw_message_reg(2), src0);
- brw_math(p,
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_math(p,
dst,
func,
- (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ ((inst->Instruction.Saturate != TGSI_SAT_NONE)
+ ? BRW_MATH_SATURATE_SATURATE
+ : BRW_MATH_SATURATE_NONE),
2,
brw_null_reg(),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
-static void emit_rcp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
-}
-static void emit_rsq(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+static void emit_alu2(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst,
+ unsigned opcode)
{
- emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, dst;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ brw_alu2(p, opcode, dst, src0, src1);
+ }
+ }
+ brw_set_saturate(p, 0);
}
-static void emit_sin(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
-}
-static void emit_cos(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+static void emit_alu1(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst,
+ unsigned opcode)
{
- emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ brw_alu1(p, opcode, dst, src0);
+ }
+ }
+ if (inst->Instruction.Saturate != TGSI_SAT_NONE)
+ brw_set_saturate(p, 0);
}
-static void emit_ex2(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
-}
-
-static void emit_lg2(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
-}
-
-static void emit_add(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, dst;
- unsigned mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- brw_ADD(p, dst, src0, src1);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_sub(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, dst;
- unsigned mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- brw_ADD(p, dst, src0, negate(src1));
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_mul(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, dst;
- unsigned mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- brw_MUL(p, dst, src0, src1);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_frc(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, dst;
- unsigned mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- brw_FRC(p, dst, src0);
- }
- }
- if (inst->SaturateMode != SATURATE_OFF)
- brw_set_saturate(p, 0);
-}
-
-static void emit_flr(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, dst;
- unsigned mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- brw_RNDD(p, dst, src0);
- }
- }
- brw_set_saturate(p, 0);
-}
static void emit_max(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ struct tgsi_full_instruction *inst)
{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- struct brw_reg src0, src1, dst;
- int i;
- brw_push_insn_state(p);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MOV(p, dst, src0);
- brw_set_saturate(p, 0);
-
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- brw_MOV(p, dst, src1);
- brw_set_saturate(p, 0);
- brw_set_predicate_control_flag_value(p, 0xff);
- }
- }
- brw_pop_insn_state(p);
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg src0, src1, dst;
+ int i;
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_MOV(p, dst, src0);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, src1);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+ brw_pop_insn_state(p);
}
static void emit_min(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ struct tgsi_full_instruction *inst)
{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- struct brw_reg src0, src1, dst;
- int i;
- brw_push_insn_state(p);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MOV(p, dst, src0);
- brw_set_saturate(p, 0);
-
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- brw_MOV(p, dst, src1);
- brw_set_saturate(p, 0);
- brw_set_predicate_control_flag_value(p, 0xff);
- }
- }
- brw_pop_insn_state(p);
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg src0, src1, dst;
+ int i;
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_MOV(p, dst, src0);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, src1);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+ brw_pop_insn_state(p);
}
static void emit_pow(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ struct tgsi_full_instruction *inst)
{
- struct brw_compile *p = &c->func;
- struct brw_reg dst, src0, src1;
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst, src0, src1;
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0);
- brw_MOV(p, brw_message_reg(2), src0);
- brw_MOV(p, brw_message_reg(3), src1);
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_MOV(p, brw_message_reg(3), src1);
- brw_math(p,
+ brw_math(p,
dst,
BRW_MATH_FUNCTION_POW,
- (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ (inst->Instruction.Saturate != TGSI_SAT_NONE
+ ? BRW_MATH_SATURATE_SATURATE
+ : BRW_MATH_SATURATE_NONE),
2,
brw_null_reg(),
BRW_MATH_DATA_VECTOR,
@@ -756,601 +440,636 @@ static void emit_pow(struct brw_wm_compile *c,
}
static void emit_lrp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ struct tgsi_full_instruction *inst)
{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
- int i;
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
-
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
-
- if (src1.nr == dst.nr) {
- tmp1 = alloc_tmp(c);
- brw_MOV(p, tmp1, src1);
- } else
- tmp1 = src1;
-
- src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
- if (src2.nr == dst.nr) {
- tmp2 = alloc_tmp(c);
- brw_MOV(p, tmp2, src2);
- } else
- tmp2 = src2;
-
- brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
- brw_MUL(p, brw_null_reg(), dst, tmp2);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MAC(p, dst, src0, tmp1);
- brw_set_saturate(p, 0);
- }
- release_tmps(c);
- }
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
+ int i;
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+
+ if (src1.nr == dst.nr) {
+ tmp1 = alloc_tmp(c);
+ brw_MOV(p, tmp1, src1);
+ } else
+ tmp1 = src1;
+
+ src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i);
+ if (src2.nr == dst.nr) {
+ tmp2 = alloc_tmp(c);
+ brw_MOV(p, tmp2, src2);
+ } else
+ tmp2 = src2;
+
+ brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst, tmp2);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_MAC(p, dst, src0, tmp1);
+ brw_set_saturate(p, 0);
+ }
+ release_tmps(c);
+ }
}
static void emit_kil(struct brw_wm_compile *c)
{
- struct brw_compile *p = &c->func;
- struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
- brw_AND(p, depth, c->emit_mask_reg, depth);
- brw_pop_insn_state(p);
+ struct brw_compile *p = &c->func;
+ struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, depth, c->emit_mask_reg, depth);
+ brw_pop_insn_state(p);
}
static void emit_mad(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ struct tgsi_full_instruction *inst)
{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- struct brw_reg dst, src0, src1, src2;
- int i;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
- brw_MUL(p, dst, src0, src1);
-
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_ADD(p, dst, dst, src2);
- brw_set_saturate(p, 0);
- }
- }
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg dst, src0, src1, src2;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i);
+ brw_MUL(p, dst, src0, src1);
+
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_ADD(p, dst, dst, src2);
+ brw_set_saturate(p, 0);
+ }
+ }
}
static void emit_sop(struct brw_wm_compile *c,
- struct prog_instruction *inst, unsigned cond)
+ struct tgsi_full_instruction *inst, unsigned cond)
{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- struct brw_reg dst, src0, src1;
- int i;
-
- brw_push_insn_state(p);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- brw_CMP(p, brw_null_reg(), cond, src0, src1);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_MOV(p, dst, brw_imm_f(0.0));
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- brw_MOV(p, dst, brw_imm_f(1.0));
- }
- }
- brw_pop_insn_state(p);
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg dst, src0, src1;
+ int i;
+
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ brw_CMP(p, brw_null_reg(), cond, src0, src1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, dst, brw_imm_f(0.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, brw_imm_f(1.0));
+ }
+ }
+ brw_pop_insn_state(p);
}
-static void emit_slt(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_L);
-}
-static void emit_sle(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+static void emit_ddx(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
{
- emit_sop(c, inst, BRW_CONDITIONAL_LE);
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg interp[4];
+ struct brw_reg dst;
+ struct brw_reg src0, w;
+ unsigned nr, i;
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+ w = get_src_reg(c, &inst->FullSrcRegisters[1], 3);
+ nr = src0.nr;
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+ brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, interp[i]);
+ brw_MUL(p, dst, dst, w);
+ }
+ }
+ brw_set_saturate(p, 0);
}
-static void emit_sgt(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+static void emit_ddy(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
{
- emit_sop(c, inst, BRW_CONDITIONAL_G);
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg interp[4];
+ struct brw_reg dst;
+ struct brw_reg src0, w;
+ unsigned nr, i;
+
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+ nr = src0.nr;
+ w = get_src_reg(c, &inst->FullSrcRegisters[1], 3);
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+ brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, suboffset(interp[i], 1));
+ brw_MUL(p, dst, dst, w);
+ }
+ }
+ brw_set_saturate(p, 0);
}
-static void emit_sge(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+/* TODO
+ BIAS on SIMD8 not workind yet...
+*/
+static void emit_txb(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
{
- emit_sop(c, inst, BRW_CONDITIONAL_GE);
-}
+#if 0
+ struct brw_compile *p = &c->func;
+ struct brw_reg payload_reg = c->payload_depth[0];
+ struct brw_reg dst[4], src[4];
+ unsigned i;
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
-static void emit_seq(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_EQ);
-}
+#if 0
+ switch (inst->TexSrcTarget) {
+ case TEXTURE_1D_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ break;
+ default:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), src[2]);
+ break;
+ }
+#else
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+#endif
-static void emit_sne(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+ brw_MOV(p, brw_message_reg(5), src[3]);
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(payload_reg, BRW_REGISTER_TYPE_UW),
+ inst->TexSrcUnit + 1, /* surface */
+ inst->TexSrcUnit, /* sampler */
+ inst->FullDstRegisters[0].DstRegister.WriteMask,
+ BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
+ 4,
+ 4,
+ 0);
+#endif
}
-static void emit_ddx(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+static void emit_tex(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst;
- struct brw_reg src0, w;
- unsigned nr, i;
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
- nr = src0.nr;
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- brw_MOV(p, dst, interp[i]);
- brw_MUL(p, dst, dst, w);
- }
- }
- brw_set_saturate(p, 0);
-}
+#if 0
+ struct brw_compile *p = &c->func;
+ struct brw_reg payload_reg = c->payload_depth[0];
+ struct brw_reg dst[4], src[4];
+ unsigned msg_len;
+ unsigned i, nr;
+ unsigned emit;
+ boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0;
+
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
-static void emit_ddy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst;
- struct brw_reg src0, w;
- unsigned nr, i;
-
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- nr = src0.nr;
- w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- brw_MOV(p, dst, suboffset(interp[i], 1));
- brw_MUL(p, dst, dst, w);
- }
- }
- brw_set_saturate(p, 0);
-}
+#if 0
+ switch (inst->TexSrcTarget) {
+ case TEXTURE_1D_INDEX:
+ emit = WRITEMASK_X;
+ nr = 1;
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ emit = WRITEMASK_XY;
+ nr = 2;
+ break;
+ default:
+ emit = WRITEMASK_XYZ;
+ nr = 3;
+ break;
+ }
+#else
+ emit = WRITEMASK_XY;
+ nr = 2;
+#endif
-static void emit_wpos_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- unsigned mask = inst->DstReg.WriteMask;
- struct brw_reg src0[2], dst[2];
-
- dst[0] = get_dst_reg(c, inst, 0, 1);
- dst[1] = get_dst_reg(c, inst, 1, 1);
-
- src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1);
-
- /* Calc delta X,Y by subtracting origin in r1 from the pixel
- * centers.
- */
- if (mask & WRITEMASK_X) {
- brw_MOV(p,
- dst[0],
- retype(src0[0], BRW_REGISTER_TYPE_UW));
- }
-
- if (mask & WRITEMASK_Y) {
- /* TODO -- window_height - Y */
- brw_MOV(p,
- dst[1],
- retype(src0[1], BRW_REGISTER_TYPE_UW));
-
- }
+ msg_len = 1;
+
+ for (i = 0; i < nr; i++) {
+ static const unsigned swz[4] = {0,1,2,2};
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
+ else
+ brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
+ msg_len += 1;
+ }
+
+ if (shadow) {
+ brw_MOV(p, brw_message_reg(5), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(6), src[2]);
+ }
+
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(payload_reg, BRW_REGISTER_TYPE_UW),
+ inst->TexSrcUnit + 1, /* surface */
+ inst->TexSrcUnit, /* sampler */
+ inst->FullDstRegisters[0].DstRegister.WriteMask,
+ BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
+ 4,
+ shadow ? 6 : 4,
+ 0);
+
+ if (shadow)
+ brw_MOV(p, dst[3], brw_imm_f(1.0));
+#endif
}
-/* TODO
- BIAS on SIMD8 not workind yet...
- */
-static void emit_txb(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg dst[4], src[4], payload_reg;
- unsigned i;
- payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
- for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i, 1);
- for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
-
- switch (inst->TexSrcTarget) {
- case TEXTURE_1D_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]);
- brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
- break;
- case TEXTURE_2D_INDEX:
- case TEXTURE_RECT_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]);
- brw_MOV(p, brw_message_reg(3), src[1]);
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
- break;
- default:
- brw_MOV(p, brw_message_reg(2), src[0]);
- brw_MOV(p, brw_message_reg(3), src[1]);
- brw_MOV(p, brw_message_reg(4), src[2]);
- break;
- }
- brw_MOV(p, brw_message_reg(5), src[3]);
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
- brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
- 1,
- retype(payload_reg, BRW_REGISTER_TYPE_UW),
- inst->TexSrcUnit + 1, /* surface */
- inst->TexSrcUnit, /* sampler */
- inst->DstReg.WriteMask,
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
- 4,
- 4,
- 0);
-}
-static void emit_tex(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg dst[4], src[4], payload_reg;
- unsigned msg_len;
- unsigned i, nr;
- unsigned emit;
- boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0;
- payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
- for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i, 1);
- for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
- switch (inst->TexSrcTarget) {
- case TEXTURE_1D_INDEX:
- emit = WRITEMASK_X;
- nr = 1;
- break;
- case TEXTURE_2D_INDEX:
- case TEXTURE_RECT_INDEX:
- emit = WRITEMASK_XY;
- nr = 2;
- break;
- default:
- emit = WRITEMASK_XYZ;
- nr = 3;
- break;
- }
- msg_len = 1;
-
- for (i = 0; i < nr; i++) {
- static const unsigned swz[4] = {0,1,2,2};
- if (emit & (1<<i))
- brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
- else
- brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
- msg_len += 1;
- }
-
- if (shadow) {
- brw_MOV(p, brw_message_reg(5), brw_imm_f(0));
- brw_MOV(p, brw_message_reg(6), src[2]);
- }
-
- brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
- 1,
- retype(payload_reg, BRW_REGISTER_TYPE_UW),
- inst->TexSrcUnit + 1, /* surface */
- inst->TexSrcUnit, /* sampler */
- inst->DstReg.WriteMask,
- BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
- 4,
- shadow ? 6 : 4,
- 0);
-
- if (shadow)
- brw_MOV(p, dst[3], brw_imm_f(1.0));
-}
-static void post_wm_emit( struct brw_wm_compile *c )
+
+static void emit_fb_write(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
{
- unsigned nr_insns = c->fp->program.Base.NumInstructions;
- unsigned insn, target_insn;
- struct prog_instruction *inst1, *inst2;
- struct brw_instruction *brw_inst1, *brw_inst2;
- int offset;
- for (insn = 0; insn < nr_insns; insn++) {
- inst1 = &c->fp->program.Base.Instructions[insn];
- brw_inst1 = inst1->Data;
- switch (inst1->Opcode) {
- case OPCODE_CAL:
- target_insn = inst1->BranchTarget;
- inst2 = &c->fp->program.Base.Instructions[target_insn];
- brw_inst2 = inst2->Data;
- offset = brw_inst2 - brw_inst1;
- brw_set_src1(brw_inst1, brw_imm_d(offset*16));
- break;
- default:
- break;
- }
- }
+ struct brw_compile *p = &c->func;
+ int nr = 2;
+ int channel;
+ int base_reg = 0;
+
+ // src0 = output color
+ // src1 = payload_depth[0]
+ // src2 = output depth
+ // dst = ???
+
+
+
+ /* Reserve a space for AA - may not be needed:
+ */
+ if (c->key.aa_dest_stencil_reg)
+ nr += 1;
+
+ {
+ brw_push_insn_state(p);
+ for (channel = 0; channel < 4; channel++) {
+ struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel];
+
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+ brw_MOV(p, brw_message_reg(nr + channel), src0);
+ }
+ /* skip over the regs populated above: */
+ nr += 8;
+ brw_pop_insn_state(p);
+ }
+
+
+ /* Pass through control information:
+ */
+ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+ brw_MOV(p,
+ brw_message_reg(base_reg + 1),
+ brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+ }
+
+ /* Send framebuffer write message: */
+ brw_fb_WRITE(p,
+ retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ base_reg,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ 0, /* render surface always 0 */
+ nr,
+ 0,
+ 1);
+
}
-static void brw_wm_emit_glsl(struct brw_wm_compile *c)
+static void brw_wm_emit_instruction( struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst )
{
-#define MAX_IFSN 32
-#define MAX_LOOP_DEPTH 32
- struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
- struct brw_instruction *inst0, *inst1;
- int i, if_insn = 0, loop_insn = 0;
- struct brw_compile *p = &c->func;
- struct brw_indirect stack_index = brw_indirect(0, 0);
-
- brw_init_compile(&c->func);
- c->reg_index = 0;
- prealloc_reg(c);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
-
- for (i = 0; i < c->nr_fp_insns; i++) {
- struct prog_instruction *inst = &c->prog_instructions[i];
- struct prog_instruction *orig_inst;
-
- if ((orig_inst = inst->Data) != 0)
- orig_inst->Data = current_insn(p);
-
- if (inst->CondUpdate)
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
- else
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
-
- switch (inst->Opcode) {
- case WM_PIXELXY:
- emit_pixel_xy(c, inst);
- break;
- case WM_DELTAXY:
- emit_delta_xy(c, inst);
- break;
- case WM_PIXELW:
- emit_pixel_w(c, inst);
- break;
- case WM_LINTERP:
- emit_linterp(c, inst);
- break;
- case WM_PINTERP:
- emit_pinterp(c, inst);
- break;
- case WM_CINTERP:
- emit_cinterp(c, inst);
- break;
- case WM_WPOSXY:
- emit_wpos_xy(c, inst);
- break;
- case WM_FB_WRITE:
- emit_fb_write(c, inst);
- break;
- case OPCODE_ABS:
- emit_abs(c, inst);
- break;
- case OPCODE_ADD:
- emit_add(c, inst);
- break;
- case OPCODE_SUB:
- emit_sub(c, inst);
- break;
- case OPCODE_FRC:
- emit_frc(c, inst);
- break;
- case OPCODE_FLR:
- emit_flr(c, inst);
- break;
- case OPCODE_LRP:
- emit_lrp(c, inst);
- break;
- case OPCODE_INT:
- emit_int(c, inst);
- break;
- case OPCODE_MOV:
- emit_mov(c, inst);
- break;
- case OPCODE_DP3:
- emit_dp3(c, inst);
- break;
- case OPCODE_DP4:
- emit_dp4(c, inst);
- break;
- case OPCODE_XPD:
- emit_xpd(c, inst);
- break;
- case OPCODE_DPH:
- emit_dph(c, inst);
- break;
- case OPCODE_RCP:
- emit_rcp(c, inst);
- break;
- case OPCODE_RSQ:
- emit_rsq(c, inst);
- break;
- case OPCODE_SIN:
- emit_sin(c, inst);
- break;
- case OPCODE_COS:
- emit_cos(c, inst);
- break;
- case OPCODE_EX2:
- emit_ex2(c, inst);
- break;
- case OPCODE_LG2:
- emit_lg2(c, inst);
- break;
- case OPCODE_MAX:
- emit_max(c, inst);
- break;
- case OPCODE_MIN:
- emit_min(c, inst);
- break;
- case OPCODE_DDX:
- emit_ddx(c, inst);
- break;
- case OPCODE_DDY:
- emit_ddy(c, inst);
- break;
- case OPCODE_SLT:
- emit_slt(c, inst);
- break;
- case OPCODE_SLE:
- emit_sle(c, inst);
- break;
- case OPCODE_SGT:
- emit_sgt(c, inst);
- break;
- case OPCODE_SGE:
- emit_sge(c, inst);
- break;
- case OPCODE_SEQ:
- emit_seq(c, inst);
- break;
- case OPCODE_SNE:
- emit_sne(c, inst);
- break;
- case OPCODE_MUL:
- emit_mul(c, inst);
- break;
- case OPCODE_POW:
- emit_pow(c, inst);
- break;
- case OPCODE_MAD:
- emit_mad(c, inst);
- break;
- case OPCODE_TEX:
- emit_tex(c, inst);
- break;
- case OPCODE_TXB:
- emit_txb(c, inst);
- break;
- case OPCODE_KIL_NV:
- emit_kil(c);
- break;
- case OPCODE_IF:
- assert(if_insn < MAX_IFSN);
- if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
- break;
- case OPCODE_ELSE:
- if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
- break;
- case OPCODE_ENDIF:
- assert(if_insn > 0);
- brw_ENDIF(p, if_inst[--if_insn]);
- break;
- case OPCODE_BGNSUB:
- case OPCODE_ENDSUB:
- break;
- case OPCODE_CAL:
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
- brw_set_access_mode(p, BRW_ALIGN_16);
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(4));
- orig_inst = inst->Data;
- orig_inst->Data = &p->store[p->nr_insn];
- brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- brw_pop_insn_state(p);
- break;
-
- case OPCODE_RET:
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(-4));
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
- brw_set_access_mode(p, BRW_ALIGN_16);
- brw_pop_insn_state(p);
-
- break;
- case OPCODE_BGNLOOP:
- loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
- break;
- case OPCODE_BRK:
- brw_BREAK(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case OPCODE_CONT:
- brw_CONT(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case OPCODE_ENDLOOP:
- loop_insn--;
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
- /* patch all the BREAK instructions from
- last BEGINLOOP */
- while (inst0 > loop_inst[loop_insn]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
- inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
- inst0->bits3.if_else.pop_count = 0;
- } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
- inst0->bits3.if_else.jump_count = inst1 - inst0;
- inst0->bits3.if_else.pop_count = 0;
- }
- }
- break;
- default:
- _mesa_printf("unsupported IR in fragment shader %d\n",
- inst->Opcode);
- }
- if (inst->CondUpdate)
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- else
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- }
- post_wm_emit(c);
- for (i = 0; i < c->fp->program.Base.NumInstructions; i++)
- c->fp->program.Base.Instructions[i].Data = NULL;
+ struct brw_compile *p = &c->func;
+
+#if 0
+ if (inst->CondUpdate)
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ else
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+#else
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+#endif
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ emit_abs(c, inst);
+ break;
+ case TGSI_OPCODE_ADD:
+ emit_alu2(c, inst, BRW_OPCODE_ADD);
+ break;
+ case TGSI_OPCODE_SUB:
+ assert(0);
+// emit_alu2(c, inst, BRW_OPCODE_SUB);
+ break;
+ case TGSI_OPCODE_FRC:
+ emit_alu1(c, inst, BRW_OPCODE_FRC);
+ break;
+ case TGSI_OPCODE_FLR:
+ assert(0);
+// emit_alu1(c, inst, BRW_OPCODE_FLR);
+ break;
+ case TGSI_OPCODE_LRP:
+ emit_lrp(c, inst);
+ break;
+ case TGSI_OPCODE_INT:
+ emit_alu1(c, inst, BRW_OPCODE_RNDD);
+ break;
+ case TGSI_OPCODE_MOV:
+ emit_alu1(c, inst, BRW_OPCODE_MOV);
+ break;
+ case TGSI_OPCODE_DP3:
+ emit_dp3(c, inst);
+ break;
+ case TGSI_OPCODE_DP4:
+ emit_dp4(c, inst);
+ break;
+ case TGSI_OPCODE_XPD:
+ emit_xpd(c, inst);
+ break;
+ case TGSI_OPCODE_DPH:
+ emit_dph(c, inst);
+ break;
+ case TGSI_OPCODE_RCP:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
+ break;
+ case TGSI_OPCODE_RSQ:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+ break;
+ case TGSI_OPCODE_SIN:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
+ break;
+ case TGSI_OPCODE_COS:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+ break;
+ case TGSI_OPCODE_EX2:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
+ break;
+ case TGSI_OPCODE_LG2:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
+ break;
+ case TGSI_OPCODE_MAX:
+ emit_max(c, inst);
+ break;
+ case TGSI_OPCODE_MIN:
+ emit_min(c, inst);
+ break;
+ case TGSI_OPCODE_DDX:
+ emit_ddx(c, inst);
+ break;
+ case TGSI_OPCODE_DDY:
+ emit_ddy(c, inst);
+ break;
+ case TGSI_OPCODE_SLT:
+ emit_sop(c, inst, BRW_CONDITIONAL_L);
+ break;
+ case TGSI_OPCODE_SLE:
+ emit_sop(c, inst, BRW_CONDITIONAL_LE);
+ break;
+ case TGSI_OPCODE_SGT:
+ emit_sop(c, inst, BRW_CONDITIONAL_G);
+ break;
+ case TGSI_OPCODE_SGE:
+ emit_sop(c, inst, BRW_CONDITIONAL_GE);
+ break;
+ case TGSI_OPCODE_SEQ:
+ emit_sop(c, inst, BRW_CONDITIONAL_EQ);
+ break;
+ case TGSI_OPCODE_SNE:
+ emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+ break;
+ case TGSI_OPCODE_MUL:
+ emit_alu2(c, inst, BRW_OPCODE_MUL);
+ break;
+ case TGSI_OPCODE_POW:
+ emit_pow(c, inst);
+ break;
+ case TGSI_OPCODE_MAD:
+ emit_mad(c, inst);
+ break;
+ case TGSI_OPCODE_TEX:
+ emit_tex(c, inst);
+ break;
+ case TGSI_OPCODE_TXB:
+ emit_txb(c, inst);
+ break;
+ case TGSI_OPCODE_TEXKILL:
+ emit_kil(c);
+ break;
+ case TGSI_OPCODE_IF:
+ assert(c->if_insn < MAX_IFSN);
+ c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ break;
+ case TGSI_OPCODE_ELSE:
+ c->if_inst[c->if_insn-1] = brw_ELSE(p, c->if_inst[c->if_insn-1]);
+ break;
+ case TGSI_OPCODE_ENDIF:
+ assert(c->if_insn > 0);
+ brw_ENDIF(p, c->if_inst[--c->if_insn]);
+ break;
+ case TGSI_OPCODE_BGNSUB:
+ case TGSI_OPCODE_ENDSUB:
+ break;
+ case TGSI_OPCODE_CAL:
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p,
+ get_addr_reg(c->stack_index),
+ get_addr_reg(c->stack_index), brw_imm_d(4));
+// orig_inst = inst->Data;
+// orig_inst->Data = &p->store[p->nr_insn];
+ assert(0);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_pop_insn_state(p);
+ break;
+
+ case TGSI_OPCODE_RET:
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD(p,
+ get_addr_reg(c->stack_index),
+ get_addr_reg(c->stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_pop_insn_state(p);
+
+ break;
+ case TGSI_OPCODE_LOOP:
+ c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case TGSI_OPCODE_BRK:
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_CONT:
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ c->loop_insn--;
+ c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]);
+ /* patch all the BREAK instructions from
+ last BEGINLOOP */
+ while (c->inst0 > c->loop_inst[c->loop_insn]) {
+ c->inst0--;
+ if (c->inst0->header.opcode == BRW_OPCODE_BREAK) {
+ c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1;
+ c->inst0->bits3.if_else.pop_count = 0;
+ } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0;
+ c->inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ break;
+ case TGSI_OPCODE_END:
+ emit_fb_write(c, inst);
+ break;
+
+ default:
+ _mesa_printf("unsupported IR in fragment shader %d\n",
+ inst->Instruction.Opcode);
+ }
+#if 0
+ if (inst->CondUpdate)
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ else
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+#endif
}
+
+
+
+
+
void brw_wm_glsl_emit(struct brw_wm_compile *c)
{
- brw_wm_pass_fp(c);
- c->tmp_index = 127;
- brw_wm_emit_glsl(c);
- c->prog_data.total_grf = c->reg_index;
- c->prog_data.total_scratch = 0;
-}
+ struct tgsi_parse_context parse;
+ struct brw_compile *p = &c->func;
+
+ brw_init_compile(&c->func);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ c->reg_index = 0;
+ c->if_insn = 0;
+ c->loop_insn = 0;
+ c->stack_index = brw_indirect(0,0);
+
+ /* Do static register allocation and parameter interpolation:
+ */
+ brw_wm_emit_decls( c );
+
+ /* Emit the actual program. All done with very direct translation,
+ * hopefully we can improve on this shortly...
+ */
+ brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
+
+ tgsi_parse_init( &parse, c->fp->program.tokens );
+
+ while( !tgsi_parse_end_of_tokens( &parse ) )
+ {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /* already done */
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* not handled yet */
+ assert(0);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction);
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+
+ tgsi_parse_free (&parse);
+
+ /* Fix up call targets:
+ */
+#if 0
+ {
+ unsigned nr_insns = c->fp->program.Base.NumInstructions;
+ unsigned insn, target_insn;
+ struct tgsi_full_instruction *inst1, *inst2;
+ struct brw_instruction *brw_inst1, *brw_inst2;
+ int offset;
+ for (insn = 0; insn < nr_insns; insn++) {
+ inst1 = &c->fp->program.Base.Instructions[insn];
+ brw_inst1 = inst1->Data;
+ switch (inst1->Opcode) {
+ case TGSI_OPCODE_CAL:
+ target_insn = inst1->BranchTarget;
+ inst2 = &c->fp->program.Base.Instructions[target_insn];
+ brw_inst2 = inst2->Data;
+ offset = brw_inst2 - brw_inst1;
+ brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+ break;
+ default:
+ break;
+ }
+ }
+ }
#endif
+
+ c->prog_data.total_grf = c->reg_index;
+ c->prog_data.total_scratch = 0;
+}
diff --git a/src/mesa/pipe/i965simple/brw_wm_sampler_state.c b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c
index 4ea0dd7db0..fbeea8c809 100644
--- a/src/mesa/pipe/i965simple/brw_wm_sampler_state.c
+++ b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c
@@ -229,12 +229,12 @@ static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_samp
* complicates various things. However, this is still too confusing -
* FIXME: simplify all the different new texture state flags.
*/
-void brw_upload_wm_samplers(struct brw_context *brw)
+static void upload_wm_samplers(struct brw_context *brw)
{
unsigned unit;
unsigned sampler_count = 0;
- /* _NEW_TEXTURE */
+ /* BRW_NEW_SAMPLER */
for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
if (brw->attribs.Samplers[unit]) { /* FIXME: correctly detect enabled ones */
const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit];
@@ -262,14 +262,11 @@ void brw_upload_wm_samplers(struct brw_context *brw)
sizeof(struct brw_sampler_state) * brw->wm.sampler_count);
}
-#if 0
const struct brw_tracked_state brw_wm_samplers = {
.dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = 0,
+ .brw = BRW_NEW_SAMPLER,
.cache = 0
},
.update = upload_wm_samplers
};
-#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_state.c b/src/mesa/pipe/i965simple/brw_wm_state.c
index b45fb2f56b..52d2c85423 100644
--- a/src/mesa/pipe/i965simple/brw_wm_state.c
+++ b/src/mesa/pipe/i965simple/brw_wm_state.c
@@ -34,15 +34,13 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_wm.h"
+#include "pipe/p_util.h"
/***********************************************************************
* WM unit - fragment programs and rasterization
*/
-
-#if 0
static void upload_wm_unit(struct brw_context *brw )
{
- struct intel_context *intel = &brw->intel;
struct brw_wm_unit_state wm;
unsigned max_threads;
unsigned per_thread;
@@ -56,7 +54,7 @@ static void upload_wm_unit(struct brw_context *brw )
memset(&wm, 0, sizeof(wm));
/* CACHE_NEW_WM_PROG */
- wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1;
+ wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1;
wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6;
wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
@@ -64,9 +62,10 @@ static void upload_wm_unit(struct brw_context *brw )
wm.wm5.max_threads = max_threads;
- per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024);
+ per_thread = align(brw->wm.prog_data->total_scratch, 1024);
assert(per_thread <= 12 * 1024);
+#if 0
if (brw->wm.prog_data->total_scratch) {
unsigned total = per_thread * (max_threads + 1);
@@ -95,6 +94,7 @@ static void upload_wm_unit(struct brw_context *brw )
* so just fail for now if we hit that path.
*/
assert(brw->wm.prog_data->total_scratch == 0);
+#endif
/* CACHE_NEW_SURFACE */
wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces;
@@ -112,23 +112,20 @@ static void upload_wm_unit(struct brw_context *brw )
/* BRW_NEW_FRAGMENT_PROGRAM */
{
- const struct gl_fragment_program *fp = brw->fragment_program;
+ const struct brw_fragment_program *fp = brw->attribs.FragmentProgram;
- if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS))
+ if (fp->UsesDepth)
wm.wm5.program_uses_depth = 1; /* as far as we can tell */
- if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR))
+ if (fp->ComputesDepth)
wm.wm5.program_computes_depth = 1;
- /* _NEW_COLOR */
+ /* BRW_NEW_ALPHA_TEST */
if (fp->UsesKill ||
- brw->attribs.Color->AlphaEnabled)
+ brw->attribs.AlphaTest->enabled)
wm.wm5.program_uses_killpixel = 1;
- if (brw_wm_is_glsl(fp))
- wm.wm5.enable_8_pix = 1;
- else
- wm.wm5.enable_16_pix = 1;
+ wm.wm5.enable_8_pix = 1;
}
wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
@@ -138,11 +135,11 @@ static void upload_wm_unit(struct brw_context *brw )
wm.wm5.line_aa_region_width = 0;
wm.wm5.line_endcap_aa_region_width = 1;
- /* _NEW_POLYGONSTIPPLE */
- if (brw->attribs.Polygon->StippleFlag)
+ /* BRW_NEW_RASTERIZER */
+ if (brw->attribs.Raster->poly_stipple_enable)
wm.wm5.polygon_stipple = 1;
- /* _NEW_POLYGON */
+#if 0
if (brw->attribs.Polygon->OffsetFill) {
wm.wm5.depth_offset = 1;
/* Something wierd going on with legacy_global_depth_bias,
@@ -156,13 +153,13 @@ static void upload_wm_unit(struct brw_context *brw )
*/
wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor;
}
+#endif
- /* _NEW_LINE */
- if (brw->attribs.Line->StippleFlag) {
+ if (brw->attribs.Raster->line_stipple_enable) {
wm.wm5.line_stipple = 1;
}
- if (BRW_DEBUG & DEBUG_STATS || intel->stats_wm)
+ if (BRW_DEBUG & DEBUG_STATS)
wm.wm4.stats_enable = 1;
brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm );
@@ -183,14 +180,10 @@ static void upload_wm_unit(struct brw_context *brw )
const struct brw_tracked_state brw_wm_unit = {
.dirty = {
- .mesa = (_NEW_POLYGON |
- _NEW_POLYGONSTIPPLE |
- _NEW_LINE |
- _NEW_COLOR),
-
- .brw = (BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_CURBE_OFFSETS |
- BRW_NEW_LOCK),
+ .brw = (BRW_NEW_RASTERIZER |
+ BRW_NEW_ALPHA_TEST |
+ BRW_NEW_FS |
+ BRW_NEW_CURBE_OFFSETS),
.cache = (CACHE_NEW_SURFACE |
CACHE_NEW_WM_PROG |
@@ -199,4 +192,3 @@ const struct brw_tracked_state brw_wm_unit = {
.update = upload_wm_unit
};
-#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_surface_state.c b/src/mesa/pipe/i965simple/brw_wm_surface_state.c
index 844cfc54a9..6e68c4c660 100644
--- a/src/mesa/pipe/i965simple/brw_wm_surface_state.c
+++ b/src/mesa/pipe/i965simple/brw_wm_surface_state.c
@@ -33,8 +33,6 @@
#include "brw_state.h"
#include "brw_defines.h"
-
-#if 0
static unsigned translate_tex_target( int target )
{
switch (target) {
@@ -75,13 +73,13 @@ static unsigned translate_tex_format( unsigned mesa_format )
assert(0); /* not supported for sampling */
return BRW_SURFACEFORMAT_R8G8B8_UNORM;
- case PIPE_FORMAT_U_A8_R8_G8_B8:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- case PIPE_FORMAT_RGBA8888_REV:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
- case PIPE_FORMAT_U_R5_G6_B5:
+ case PIPE_FORMAT_R5G6B5_UNORM:
return BRW_SURFACEFORMAT_B5G6R5_UNORM;
case PIPE_FORMAT_A1R5G5B5_UNORM:
@@ -95,14 +93,15 @@ static unsigned translate_tex_format( unsigned mesa_format )
case PIPE_FORMAT_YCBCR:
return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
-
+#if 0
case PIPE_FORMAT_RGB_FXT1:
case PIPE_FORMAT_RGBA_FXT1:
return BRW_SURFACEFORMAT_FXT1;
+#endif
case PIPE_FORMAT_Z16_UNORM:
return BRW_SURFACEFORMAT_I16_UNORM;
-
+#if 0
case PIPE_FORMAT_RGB_DXT1:
return BRW_SURFACEFORMAT_DXT1_RGB;
@@ -119,6 +118,7 @@ static unsigned translate_tex_format( unsigned mesa_format )
return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB;
case PIPE_FORMAT_SRGB_DXT1:
return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
+#endif
default:
assert(0);
@@ -126,41 +126,46 @@ static unsigned translate_tex_format( unsigned mesa_format )
}
}
+static unsigned brw_buffer_offset(struct brw_context *brw,
+ struct pipe_buffer_handle *buffer)
+{
+ return brw->winsys->get_buffer_offset(brw->winsys,
+ buffer,
+ 0);
+}
+
static
-void brw_update_texture_surface( GLcontext *ctx,
+void brw_update_texture_surface( struct brw_context *brw,
unsigned unit )
{
- struct brw_context *brw = brw_context(ctx);
- struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current;
- struct intel_texture_object *intelObj = intel_texture_object(tObj);
- struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+ const struct brw_texture *tObj = brw->attribs.Texture[unit];
struct brw_surface_state surf;
memset(&surf, 0, sizeof(surf));
surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
- surf.ss0.surface_type = translate_tex_target(tObj->Target);
- surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat);
+ surf.ss0.surface_type = translate_tex_target(tObj->base.target);
+ surf.ss0.surface_format = translate_tex_format(tObj->base.format);
/* This is ok for all textures with channel width 8bit or less:
*/
/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
/* Updated in emit_reloc */
- surf.ss1.base_addr = brw_buffer_offset( intelObj->mt->region->buffer );
+ surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer );
- surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel;
- surf.ss2.width = firstImage->Width - 1;
- surf.ss2.height = firstImage->Height - 1;
+ surf.ss2.mip_count = tObj->base.last_level - tObj->base.first_level;
+ surf.ss2.width = tObj->base.width[0];
+ surf.ss2.height = tObj->base.height[0];
surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
- surf.ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */
- surf.ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1;
- surf.ss3.depth = firstImage->Depth - 1;
+ surf.ss3.tiled_surface = 0; /* always zero */
+ surf.ss3.pitch = tObj->pitch;
+ surf.ss3.depth = tObj->base.depth[0];
surf.ss4.min_lod = 0;
- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+ if (tObj->base.target == PIPE_TEXTURE_CUBE) {
surf.ss0.cube_pos_x = 1;
surf.ss0.cube_pos_y = 1;
surf.ss0.cube_pos_z = 1;
@@ -180,13 +185,14 @@ void brw_update_texture_surface( GLcontext *ctx,
static void upload_wm_surfaces(struct brw_context *brw )
{
- GLcontext *ctx = &brw->intel.ctx;
- struct intel_context *intel = &brw->intel;
unsigned i;
{
struct brw_surface_state surf;
- struct intel_region *region = brw->state.draw_region;
+
+ /* BRW_NEW_FRAMEBUFFER
+ */
+ struct pipe_surface *region = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/
memset(&surf, 0, sizeof(surf));
@@ -198,27 +204,27 @@ static void upload_wm_surfaces(struct brw_context *brw )
surf.ss0.surface_type = BRW_SURFACE_2D;
- surf.ss1.base_addr = brw_buffer_offset( region->buffer );
+ surf.ss1.base_addr = brw_buffer_offset( brw, region->buffer );
- surf.ss2.width = region->pitch - 1; /* XXX: not really! */
- surf.ss2.height = region->height - 1;
+ surf.ss2.width = region->width;
+ surf.ss2.height = region->height;
surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
- surf.ss3.tiled_surface = region->tiled;
- surf.ss3.pitch = (region->pitch * region->cpp) - 1;
+ surf.ss3.tiled_surface = 0;
+ surf.ss3.pitch = region->pitch;
} else {
surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
surf.ss0.surface_type = BRW_SURFACE_NULL;
}
- /* _NEW_COLOR */
- surf.ss0.color_blend = (!brw->attribs.Color->_LogicOpEnabled &&
- brw->attribs.Color->BlendEnabled);
+ /* BRW_NEW_BLEND */
+ surf.ss0.color_blend = (!brw->attribs.Blend->logicop_enable &&
+ brw->attribs.Blend->blend_enable);
- surf.ss0.writedisable_red = !brw->attribs.Color->ColorMask[0];
- surf.ss0.writedisable_green = !brw->attribs.Color->ColorMask[1];
- surf.ss0.writedisable_blue = !brw->attribs.Color->ColorMask[2];
- surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3];
+ surf.ss0.writedisable_red = !brw->attribs.BlendColor.color[0];
+ surf.ss0.writedisable_green = !brw->attribs.BlendColor.color[1];
+ surf.ss0.writedisable_blue = !brw->attribs.BlendColor.color[2];
+ surf.ss0.writedisable_alpha = !brw->attribs.BlendColor.color[3];
@@ -230,23 +236,24 @@ static void upload_wm_surfaces(struct brw_context *brw )
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
- struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i];
+ const struct brw_texture *texUnit = brw->attribs.Texture[i];
- /* _NEW_TEXTURE, BRW_NEW_TEXDATA
+ /* BRW_NEW_TEXTURE
*/
- if (texUnit->_ReallyEnabled &&
- intel_finalize_mipmap_tree(intel, i)) {
+ if (texUnit->base.refcount/*(texUnit->refcount > 0) == really used */) {
- brw_update_texture_surface(ctx, i);
+ brw_update_texture_surface(brw, i);
brw->wm.nr_surfaces = i+2;
}
- else if( texUnit->_ReallyEnabled &&
+#if 0
+ else if( texUnit->refcount &&
texUnit->_Current == intel->frame_buffer_texobj )
{
brw->wm.bind.surf_ss_offset[i+1] = brw->wm.bind.surf_ss_offset[0];
brw->wm.nr_surfaces = i+2;
}
+#endif
else {
brw->wm.bind.surf_ss_offset[i+1] = 0;
}
@@ -293,14 +300,12 @@ static void emit_reloc_wm_surfaces(struct brw_context *brw)
}
#endif
-
-
const struct brw_tracked_state brw_wm_surfaces = {
.dirty = {
- .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS,
- .brw = BRW_NEW_CONTEXT,
+ .brw = (BRW_NEW_FRAMEBUFFER |
+ BRW_NEW_BLEND |
+ BRW_NEW_TEXTURE),
.cache = 0
},
.update = upload_wm_surfaces,
};
-#endif
diff --git a/src/mesa/pipe/p_state.h b/src/mesa/pipe/p_state.h
index 4e42838f1d..af65d365bf 100644
--- a/src/mesa/pipe/p_state.h
+++ b/src/mesa/pipe/p_state.h
@@ -94,6 +94,7 @@ struct pipe_rasterizer_state
unsigned line_stipple_factor:8; /**< [1..256] actually */
unsigned line_stipple_pattern:16;
unsigned bypass_clipping:1;
+ unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */
float line_width;
float point_size; /**< used when no per-vertex size */
@@ -139,6 +140,7 @@ struct pipe_shader_state {
const struct tgsi_token *tokens;
ubyte num_inputs;
ubyte num_outputs;
+ ubyte input_map[PIPE_MAX_SHADER_INPUTS]; /* XXX this may be temporary */
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
diff --git a/src/mesa/pipe/p_util.h b/src/mesa/pipe/p_util.h
index e6d284d932..46edcf3075 100644
--- a/src/mesa/pipe/p_util.h
+++ b/src/mesa/pipe/p_util.h
@@ -360,6 +360,11 @@ static INLINE float LOG2(float val)
#define CEILF(x) ((float) ceil(x))
#endif
+static INLINE int align(int value, int alignment)
+{
+ return (value + alignment - 1) & ~(alignment - 1);
+}
+
/* Convenient...
*/
extern void _mesa_printf(const char *str, ...);
diff --git a/src/mesa/pipe/softpipe/sp_context.c b/src/mesa/pipe/softpipe/sp_context.c
index dcf0444f6e..b62e691e87 100644
--- a/src/mesa/pipe/softpipe/sp_context.c
+++ b/src/mesa/pipe/softpipe/sp_context.c
@@ -55,8 +55,6 @@ static boolean
softpipe_is_format_supported( struct pipe_context *pipe,
enum pipe_format format, uint type )
{
- struct softpipe_context *softpipe = softpipe_context( pipe );
-
switch (type) {
case PIPE_TEXTURE:
/* softpipe supports all texture formats */
diff --git a/src/mesa/pipe/softpipe/sp_context.h b/src/mesa/pipe/softpipe/sp_context.h
index 2c038de5f7..8fd44933f2 100644
--- a/src/mesa/pipe/softpipe/sp_context.h
+++ b/src/mesa/pipe/softpipe/sp_context.h
@@ -110,8 +110,6 @@ struct softpipe_context {
struct vertex_info vertex_info;
unsigned attr_mask;
unsigned nr_frag_attrs; /**< number of active fragment attribs */
- boolean need_z; /**< produce quad/fragment Z values? */
- boolean need_w; /**< produce quad/fragment W values? */
int psize_slot;
#if 0
diff --git a/src/mesa/pipe/softpipe/sp_headers.h b/src/mesa/pipe/softpipe/sp_headers.h
index b9f2b2205a..0ae31d8796 100644
--- a/src/mesa/pipe/softpipe/sp_headers.h
+++ b/src/mesa/pipe/softpipe/sp_headers.h
@@ -73,6 +73,7 @@ struct quad_header {
float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */
const struct tgsi_interp_coef *coef;
+ const struct tgsi_interp_coef *posCoef;
unsigned nr_attrs;
};
diff --git a/src/mesa/pipe/softpipe/sp_prim_setup.c b/src/mesa/pipe/softpipe/sp_prim_setup.c
index fc96f92af1..2ccf5e2624 100644
--- a/src/mesa/pipe/softpipe/sp_prim_setup.c
+++ b/src/mesa/pipe/softpipe/sp_prim_setup.c
@@ -36,10 +36,12 @@
#include "sp_context.h"
#include "sp_headers.h"
#include "sp_quad.h"
+#include "sp_state.h"
#include "sp_prim_setup.h"
#include "pipe/draw/draw_private.h"
#include "pipe/draw/draw_vertex.h"
#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
#define DEBUG_VERTS 0
@@ -80,8 +82,11 @@ struct setup_stage {
float oneoverarea;
struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
+ struct tgsi_interp_coef posCoef; /* For Z, W */
struct quad_header quad;
+ uint firstFpInput; /** Semantic type of first frag input */
+
struct {
int left[2]; /**< [0] = row0, [1] = row1 */
int right[2];
@@ -365,18 +370,17 @@ static boolean setup_sort_vertices( struct setup_stage *setup,
* \param i which component of the slot (0..3)
*/
static void const_coeff( struct setup_stage *setup,
- unsigned slot,
- unsigned i )
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
{
- assert(slot < PIPE_MAX_SHADER_INPUTS);
assert(i <= 3);
- setup->coef[slot].dadx[i] = 0;
- setup->coef[slot].dady[i] = 0;
+ coef->dadx[i] = 0;
+ coef->dady[i] = 0;
/* need provoking vertex info!
*/
- setup->coef[slot].a0[i] = setup->vprovoke->data[slot][i];
+ coef->a0[i] = setup->vprovoke->data[vertSlot][i];
}
@@ -385,19 +389,20 @@ static void const_coeff( struct setup_stage *setup,
* for a triangle.
*/
static void tri_linear_coeff( struct setup_stage *setup,
- unsigned slot,
- unsigned i)
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
{
- float botda = setup->vmid->data[slot][i] - setup->vmin->data[slot][i];
- float majda = setup->vmax->data[slot][i] - setup->vmin->data[slot][i];
+ float botda = setup->vmid->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
+ float majda = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
-
- assert(slot < PIPE_MAX_SHADER_INPUTS);
+ float dadx = a * setup->oneoverarea;
+ float dady = b * setup->oneoverarea;
+
assert(i <= 3);
- setup->coef[slot].dadx[i] = a * setup->oneoverarea;
- setup->coef[slot].dady[i] = b * setup->oneoverarea;
+ coef->dadx[i] = dadx;
+ coef->dady[i] = dady;
/* calculate a0 as the value which would be sampled for the
* fragment at (0,0), taking into account that we want to sample at
@@ -411,9 +416,9 @@ static void tri_linear_coeff( struct setup_stage *setup,
* to define a0 as the sample at a pixel center somewhere near vmin
* instead - i'll switch to this later.
*/
- setup->coef[slot].a0[i] = (setup->vmin->data[slot][i] -
- (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) +
- setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f)));
+ coef->a0[i] = (setup->vmin->data[vertSlot][i] -
+ (dadx * (setup->vmin->data[0][0] - 0.5f) +
+ dady * (setup->vmin->data[0][1] - 0.5f)));
/*
_mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
@@ -434,76 +439,144 @@ static void tri_linear_coeff( struct setup_stage *setup,
* divide the interpolated value by the interpolated W at that fragment.
*/
static void tri_persp_coeff( struct setup_stage *setup,
- unsigned slot,
- unsigned i )
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
{
- /* premultiply by 1/w:
+ /* premultiply by 1/w (v->data[0][3] is always W):
*/
- float mina = setup->vmin->data[slot][i] * setup->vmin->data[0][3];
- float mida = setup->vmid->data[slot][i] * setup->vmid->data[0][3];
- float maxa = setup->vmax->data[slot][i] * setup->vmax->data[0][3];
-
+ float mina = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3];
+ float mida = setup->vmid->data[vertSlot][i] * setup->vmid->data[0][3];
+ float maxa = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3];
float botda = mida - mina;
float majda = maxa - mina;
float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
+ float dadx = a * setup->oneoverarea;
+ float dady = b * setup->oneoverarea;
/*
- printf("tri persp %d,%d: %f %f %f\n", slot, i,
- setup->vmin->data[slot][i],
- setup->vmid->data[slot][i],
- setup->vmax->data[slot][i]
+ printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
+ setup->vmin->data[vertSlot][i],
+ setup->vmid->data[vertSlot][i],
+ setup->vmax->data[vertSlot][i]
);
*/
-
- assert(slot < PIPE_MAX_SHADER_INPUTS);
assert(i <= 3);
- setup->coef[slot].dadx[i] = a * setup->oneoverarea;
- setup->coef[slot].dady[i] = b * setup->oneoverarea;
- setup->coef[slot].a0[i] = (mina -
- (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) +
- setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f)));
+ coef->dadx[i] = dadx;
+ coef->dady[i] = dady;
+ coef->a0[i] = (mina -
+ (dadx * (setup->vmin->data[0][0] - 0.5f) +
+ dady * (setup->vmin->data[0][1] - 0.5f)));
}
/**
+ * Special coefficient setup for gl_FragCoord.
+ * X and Y are trivial, though Y has to be inverted for OpenGL.
+ * Z and W are copied from posCoef which should have already been computed.
+ * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
+ */
+static void
+setup_fragcoord_coeff(struct setup_stage *setup)
+{
+ /*X*/
+ setup->coef[0].a0[0] = 0;
+ setup->coef[0].dadx[0] = 1.0;
+ setup->coef[0].dady[0] = 0.0;
+ /*Y*/
+ if (setup->softpipe->rasterizer->origin_lower_left) {
+ /* y=0=bottom */
+ const int winHeight = setup->softpipe->framebuffer.cbufs[0]->height;
+ setup->coef[0].a0[1] = winHeight - 1;
+ setup->coef[0].dady[1] = -1.0;
+ }
+ else {
+ /* y=0=top */
+ setup->coef[0].a0[1] = 0.0;
+ setup->coef[0].dady[1] = 1.0;
+ }
+ setup->coef[0].dadx[1] = 0.0;
+ /*Z*/
+ setup->coef[0].a0[2] = setup->posCoef.a0[2];
+ setup->coef[0].dadx[2] = setup->posCoef.dadx[2];
+ setup->coef[0].dady[2] = setup->posCoef.dady[2];
+ /*w*/
+ setup->coef[0].a0[3] = setup->posCoef.a0[3];
+ setup->coef[0].dadx[3] = setup->posCoef.dadx[3];
+ setup->coef[0].dady[3] = setup->posCoef.dady[3];
+}
+
+
+
+/**
* Compute the setup->coef[] array dadx, dady, a0 values.
* Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
*/
static void setup_tri_coefficients( struct setup_stage *setup )
{
const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode;
- unsigned slot, j;
+#define USE_INPUT_MAP 0
+#if USE_INPUT_MAP
+ const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
+#endif
+ uint fragSlot;
/* z and w are done by linear interpolation:
*/
- tri_linear_coeff(setup, 0, 2);
- tri_linear_coeff(setup, 0, 3);
+ tri_linear_coeff(setup, &setup->posCoef, 0, 2);
+ tri_linear_coeff(setup, &setup->posCoef, 0, 3);
/* setup interpolation for all the remaining attributes:
*/
- for (slot = 1; slot < setup->quad.nr_attrs; slot++) {
- switch (interp[slot]) {
- case INTERP_CONSTANT:
- for (j = 0; j < NUM_CHANNELS; j++)
- const_coeff(setup, slot, j);
- break;
-
- case INTERP_LINEAR:
- for (j = 0; j < NUM_CHANNELS; j++)
- tri_linear_coeff(setup, slot, j);
- break;
-
- case INTERP_PERSPECTIVE:
- for (j = 0; j < NUM_CHANNELS; j++)
- tri_persp_coeff(setup, slot, j);
- break;
-
- default:
- /* invalid interp mode */
- assert(0);
+ for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) {
+ /* which vertex output maps to this fragment input: */
+#if !USE_INPUT_MAP
+ uint vertSlot;
+ if (setup->firstFpInput == TGSI_SEMANTIC_POSITION) {
+ if (fragSlot == 0) {
+ setup_fragcoord_coeff(setup);
+ continue;
+ }
+ vertSlot = fragSlot;
+ }
+ else {
+ vertSlot = fragSlot + 1;
}
+
+#else
+ uint vertSlot = fs->input_map[fragSlot];
+
+ if (vertSlot == 0) {
+ /* special case: shader is reading gl_FragCoord */
+ /* XXX with a new INTERP_POSITION token, we could just add a
+ * new case to the switch below.
+ */
+ setup_fragcoord_coeff(setup);
+ }
+ else {
+#endif
+ uint j;
+ switch (interp[vertSlot]) {
+ case INTERP_CONSTANT:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_LINEAR:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_PERSPECTIVE:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ default:
+ /* invalid interp mode */
+ assert(0);
+ }
+#if USE_INPUT_MAP
+ }
+#endif
}
}
@@ -660,17 +733,18 @@ static void setup_tri( struct draw_stage *stage,
* for a line.
*/
static void
-line_linear_coeff(struct setup_stage *setup, unsigned slot, unsigned i)
+line_linear_coeff(struct setup_stage *setup,
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
{
- const float da = setup->vmax->data[slot][i] - setup->vmin->data[slot][i];
+ const float da = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
const float dadx = da * setup->emaj.dx * setup->oneoverarea;
const float dady = da * setup->emaj.dy * setup->oneoverarea;
- setup->coef[slot].dadx[i] = dadx;
- setup->coef[slot].dady[i] = dady;
- setup->coef[slot].a0[i]
- = (setup->vmin->data[slot][i] -
- (dadx * (setup->vmin->data[0][0] - 0.5f) +
- dady * (setup->vmin->data[0][1] - 0.5f)));
+ coef->dadx[i] = dadx;
+ coef->dady[i] = dady;
+ coef->a0[i] = (setup->vmin->data[vertSlot][i] -
+ (dadx * (setup->vmin->data[0][0] - 0.5f) +
+ dady * (setup->vmin->data[0][1] - 0.5f)));
}
@@ -679,21 +753,21 @@ line_linear_coeff(struct setup_stage *setup, unsigned slot, unsigned i)
* for a line.
*/
static void
-line_persp_coeff(struct setup_stage *setup, unsigned slot, unsigned i)
+line_persp_coeff(struct setup_stage *setup,
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
{
/* XXX double-check/verify this arithmetic */
- const float a0 = setup->vmin->data[slot][i] * setup->vmin->data[0][3];
- const float a1 = setup->vmax->data[slot][i] * setup->vmin->data[0][3];
+ const float a0 = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3];
+ const float a1 = setup->vmax->data[vertSlot][i] * setup->vmin->data[0][3];
const float da = a1 - a0;
const float dadx = da * setup->emaj.dx * setup->oneoverarea;
const float dady = da * setup->emaj.dy * setup->oneoverarea;
- setup->coef[slot].dadx[i] = dadx;
- setup->coef[slot].dady[i] = dady;
- setup->coef[slot].a0[i]
- = (setup->vmin->data[slot][i] -
- (dadx * (setup->vmin->data[0][0] - 0.5f) +
- dady * (setup->vmin->data[0][1] - 0.5f)));
-
+ coef->dadx[i] = dadx;
+ coef->dady[i] = dady;
+ coef->a0[i] = (setup->vmin->data[vertSlot][i] -
+ (dadx * (setup->vmin->data[0][0] - 0.5f) +
+ dady * (setup->vmin->data[0][1] - 0.5f)));
}
@@ -705,7 +779,8 @@ static INLINE void
setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim)
{
const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode;
- unsigned slot, j;
+ const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
+ unsigned fragSlot;
/* use setup->vmin, vmax to point to vertices */
setup->vprovoke = prim->v[1];
@@ -720,31 +795,39 @@ setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim)
/* z and w are done by linear interpolation:
*/
- line_linear_coeff(setup, 0, 2);
- line_linear_coeff(setup, 0, 3);
+ line_linear_coeff(setup, &setup->posCoef, 0, 2);
+ line_linear_coeff(setup, &setup->posCoef, 0, 3);
/* setup interpolation for all the remaining attributes:
*/
- for (slot = 1; slot < setup->quad.nr_attrs; slot++) {
- switch (interp[slot]) {
- case INTERP_CONSTANT:
- for (j = 0; j < NUM_CHANNELS; j++)
- const_coeff(setup, slot, j);
- break;
-
- case INTERP_LINEAR:
- for (j = 0; j < NUM_CHANNELS; j++)
- line_linear_coeff(setup, slot, j);
- break;
-
- case INTERP_PERSPECTIVE:
- for (j = 0; j < NUM_CHANNELS; j++)
- line_persp_coeff(setup, slot, j);
- break;
-
- default:
- /* invalid interp mode */
- assert(0);
+ for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) {
+ /* which vertex output maps to this fragment input: */
+ uint vertSlot = fs->input_map[fragSlot];
+
+ if (vertSlot == 0) {
+ /* special case: shader is reading gl_FragCoord */
+ setup_fragcoord_coeff(setup);
+ }
+ else {
+ uint j;
+ switch (interp[vertSlot]) {
+ case INTERP_CONSTANT:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_LINEAR:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_PERSPECTIVE:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+
+ default:
+ /* invalid interp mode */
+ assert(0);
+ }
}
}
}
@@ -910,14 +993,15 @@ setup_line(struct draw_stage *stage, struct prim_header *prim)
static void
-point_persp_coeff(struct setup_stage *setup, const struct vertex_header *vert,
- uint slot, uint i)
+point_persp_coeff(struct setup_stage *setup,
+ const struct vertex_header *vert,
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
{
- assert(slot < PIPE_MAX_SHADER_INPUTS);
assert(i <= 3);
- setup->coef[slot].dadx[i] = 0.0F;
- setup->coef[slot].dady[i] = 0.0F;
- setup->coef[slot].a0[i] = vert->data[slot][i] * vert->data[0][3];
+ coef->dadx[i] = 0.0F;
+ coef->dady[i] = 0.0F;
+ coef->a0[i] = vert->data[vertSlot][i] * vert->data[0][3];
}
@@ -930,6 +1014,7 @@ static void
setup_point(struct draw_stage *stage, struct prim_header *prim)
{
struct setup_stage *setup = setup_stage( stage );
+ const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode;
const struct vertex_header *v0 = prim->v[0];
const int sizeAttr = setup->softpipe->psize_slot;
@@ -940,7 +1025,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim)
const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
const float x = v0->data[0][0]; /* Note: data[0] is always position */
const float y = v0->data[0][1];
- unsigned slot, j;
+ uint fragSlot;
/* For points, all interpolants are constant-valued.
* However, for point sprites, we'll need to setup texcoords appropriately.
@@ -959,22 +1044,36 @@ setup_point(struct draw_stage *stage, struct prim_header *prim)
* probably should be ruled out on that basis.
*/
setup->vprovoke = prim->v[0];
- const_coeff(setup, 0, 2);
- const_coeff(setup, 0, 3);
- for (slot = 1; slot < setup->quad.nr_attrs; slot++) {
- switch (interp[slot]) {
- case INTERP_CONSTANT:
- /* fall-through */
- case INTERP_LINEAR:
- for (j = 0; j < NUM_CHANNELS; j++)
- const_coeff(setup, slot, j);
- break;
- case INTERP_PERSPECTIVE:
- for (j = 0; j < NUM_CHANNELS; j++)
- point_persp_coeff(setup, v0, slot, j);
- break;
- default:
- assert(0);
+
+ /* setup Z, W */
+ const_coeff(setup, &setup->posCoef, 0, 2);
+ const_coeff(setup, &setup->posCoef, 0, 3);
+
+ for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) {
+ /* which vertex output maps to this fragment input: */
+ uint vertSlot = fs->input_map[fragSlot];
+
+ if (vertSlot == 0) {
+ /* special case: shader is reading gl_FragCoord */
+ setup_fragcoord_coeff(setup);
+ }
+ else {
+ uint j;
+ switch (interp[vertSlot]) {
+ case INTERP_CONSTANT:
+ /* fall-through */
+ case INTERP_LINEAR:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_PERSPECTIVE:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ point_persp_coeff(setup, setup->vprovoke,
+ &setup->coef[fragSlot], vertSlot, j);
+ break;
+ default:
+ assert(0);
+ }
}
}
@@ -1108,9 +1207,12 @@ static void setup_begin( struct draw_stage *stage )
{
struct setup_stage *setup = setup_stage(stage);
struct softpipe_context *sp = setup->softpipe;
+ const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
setup->quad.nr_attrs = setup->softpipe->nr_frag_attrs;
+ setup->firstFpInput = fs->input_semantic_name[0];
+
sp->quad.first->begin(sp->quad.first);
}
@@ -1151,6 +1253,7 @@ struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe )
setup->stage.destroy = render_destroy;
setup->quad.coef = setup->coef;
+ setup->quad.posCoef = &setup->posCoef;
return &setup->stage;
}
diff --git a/src/mesa/pipe/softpipe/sp_quad_earlyz.c b/src/mesa/pipe/softpipe/sp_quad_earlyz.c
index 3abd1f1fb9..22ea99049f 100644
--- a/src/mesa/pipe/softpipe/sp_quad_earlyz.c
+++ b/src/mesa/pipe/softpipe/sp_quad_earlyz.c
@@ -47,9 +47,9 @@ earlyz_quad(
{
const float fx = (float) quad->x0;
const float fy = (float) quad->y0;
- const float dzdx = quad->coef[0].dadx[2];
- const float dzdy = quad->coef[0].dady[2];
- const float z0 = quad->coef[0].a0[2] + dzdx * fx + dzdy * fy;
+ const float dzdx = quad->posCoef->dadx[2];
+ const float dzdy = quad->posCoef->dady[2];
+ const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
quad->outputs.depth[0] = z0;
quad->outputs.depth[1] = z0 + dzdx;
diff --git a/src/mesa/pipe/softpipe/sp_quad_fs.c b/src/mesa/pipe/softpipe/sp_quad_fs.c
index 251b47341a..6e7e7eb074 100644
--- a/src/mesa/pipe/softpipe/sp_quad_fs.c
+++ b/src/mesa/pipe/softpipe/sp_quad_fs.c
@@ -74,15 +74,49 @@ quad_shade_stage(struct quad_stage *qs)
}
+/**
+ * Compute quad X,Y,Z,W for the four fragments in a quad.
+ * Note that we only need to "compute" X and Y for the upper-left fragment.
+ * We could do less work if we're not depth testing, or there's no
+ * perspective-corrected attributes, but that's seldom.
+ */
+static void
+setup_pos_vector(const struct tgsi_interp_coef *coef,
+ float x, float y,
+ struct tgsi_exec_vector *quadpos)
+{
+ uint chan;
+ /* do X */
+ quadpos->xyzw[0].f[0] = x;
+ /* do Y */
+ quadpos->xyzw[1].f[0] = y;
+ /* do Z and W for all fragments in the quad */
+ for (chan = 2; chan < 4; chan++) {
+ const float dadx = coef->dadx[chan];
+ const float dady = coef->dady[chan];
+ const float a0 = coef->a0[chan] + dadx * x + dady * y;
+ quadpos->xyzw[chan].f[0] = a0;
+ quadpos->xyzw[chan].f[1] = a0 + dadx;
+ quadpos->xyzw[chan].f[2] = a0 + dady;
+ quadpos->xyzw[chan].f[3] = a0 + dadx + dady;
+ }
+}
+
+
typedef void (XSTDCALL *codegen_function)(
const struct tgsi_exec_vector *input,
struct tgsi_exec_vector *output,
float (*constant)[4],
struct tgsi_exec_vector *temporary,
- const struct tgsi_interp_coef *coef );
+ const struct tgsi_interp_coef *coef
+#if 0
+ ,const struct tgsi_exec_vector *quadPos
+#endif
+ );
+
-/* This should be done by the fragment shader execution unit (code
- * generated from the decl instructions). Do it here for now.
+/**
+ * Execute fragment shader for the four fragments in the quad.
*/
static void
shade_quad(
@@ -91,28 +125,15 @@ shade_quad(
{
struct quad_shade_stage *qss = quad_shade_stage( qs );
struct softpipe_context *softpipe = qs->softpipe;
- const float fx = (float) quad->x0;
- const float fy = (float) quad->y0;
struct tgsi_exec_machine *machine = &qss->machine;
- /* Consts does not require 16 byte alignment. */
+ /* Consts do not require 16 byte alignment. */
machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
machine->InterpCoefs = quad->coef;
- machine->Inputs[0].xyzw[0].f[0] = fx;
- machine->Inputs[0].xyzw[0].f[1] = fx + 1.0f;
- machine->Inputs[0].xyzw[0].f[2] = fx;
- machine->Inputs[0].xyzw[0].f[3] = fx + 1.0f;
-
- /* XXX for OpenGL we need to invert the Y pos here (y=0=top).
- * but that'll mess up linear/perspective interpolation of other
- * attributes...
- */
- machine->Inputs[0].xyzw[1].f[0] = fy;
- machine->Inputs[0].xyzw[1].f[1] = fy;
- machine->Inputs[0].xyzw[1].f[2] = fy + 1.0f;
- machine->Inputs[0].xyzw[1].f[3] = fy + 1.0f;
+ /* Compute X, Y, Z, W vals for this quad */
+ setup_pos_vector(quad->posCoef, quad->x0, quad->y0, &machine->QuadPos);
/* run shader */
#if defined(__i386__) || defined(__386__)
@@ -123,7 +144,11 @@ shade_quad(
machine->Outputs,
machine->Consts,
machine->Temps,
- machine->InterpCoefs );
+ machine->InterpCoefs
+#if 0
+ ,machine->QuadPos
+#endif
+ );
quad->mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]);
}
else
diff --git a/src/mesa/pipe/softpipe/sp_quad_stipple.c b/src/mesa/pipe/softpipe/sp_quad_stipple.c
index 04d95989c4..0c42963dfe 100644
--- a/src/mesa/pipe/softpipe/sp_quad_stipple.c
+++ b/src/mesa/pipe/softpipe/sp_quad_stipple.c
@@ -22,10 +22,18 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad)
if (quad->prim == PRIM_TRI) {
struct softpipe_context *softpipe = qs->softpipe;
/* need to invert Y to index into OpenGL's stipple pattern */
- const int y0 = softpipe->framebuffer.cbufs[0]->height - 1 - quad->y0;
- const int y1 = y0 - 1;
- const unsigned stipple0 = softpipe->poly_stipple.stipple[y0 % 32];
- const unsigned stipple1 = softpipe->poly_stipple.stipple[y1 % 32];
+ int y0, y1;
+ uint stipple0, stipple1;
+ if (softpipe->rasterizer->origin_lower_left) {
+ y0 = softpipe->framebuffer.cbufs[0]->height - 1 - quad->y0;
+ y1 = y0 - 1;
+ }
+ else {
+ y0 = quad->y0;
+ y1 = y0 + 1;
+ }
+ stipple0 = softpipe->poly_stipple.stipple[y0 % 32];
+ stipple1 = softpipe->poly_stipple.stipple[y1 % 32];
#if 1
const int col0 = quad->x0 % 32;
diff --git a/src/mesa/pipe/softpipe/sp_state_derived.c b/src/mesa/pipe/softpipe/sp_state_derived.c
index c4f1a0a01a..736ac1c33b 100644
--- a/src/mesa/pipe/softpipe/sp_state_derived.c
+++ b/src/mesa/pipe/softpipe/sp_state_derived.c
@@ -51,18 +51,11 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe )
memset(vinfo, 0, sizeof(*vinfo));
- if (softpipe->depth_stencil->depth.enabled)
- softpipe->need_z = TRUE;
- else
- softpipe->need_z = FALSE;
- softpipe->need_w = FALSE;
if (fs->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
/* Need Z if depth test is enabled or the fragment program uses the
* fragment position (XYZW).
*/
- softpipe->need_z = TRUE;
- softpipe->need_w = TRUE;
}
softpipe->psize_slot = -1;
@@ -121,7 +114,6 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe )
case TGSI_SEMANTIC_GENERIC:
/* this includes texcoords and varying vars */
draw_emit_vertex_attr(vinfo, FORMAT_4F, INTERP_PERSPECTIVE);
- softpipe->need_w = TRUE;
break;
default:
@@ -129,7 +121,11 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe )
}
}
+#if 00
softpipe->nr_frag_attrs = vinfo->num_attribs;
+#else
+ softpipe->nr_frag_attrs = fs->num_inputs;
+#endif
/* We want these after all other attribs since they won't get passed
* to the fragment shader. All prior vertex output attribs should match
diff --git a/src/mesa/pipe/softpipe/sp_texture.c b/src/mesa/pipe/softpipe/sp_texture.c
index 2dd1add6f7..44512e4281 100644
--- a/src/mesa/pipe/softpipe/sp_texture.c
+++ b/src/mesa/pipe/softpipe/sp_texture.c
@@ -52,10 +52,6 @@ static unsigned minify( unsigned d )
return MAX2(1, d>>1);
}
-static int align(int value, int alignment)
-{
- return (value + alignment - 1) & ~(alignment - 1);
-}
static void
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.c b/src/mesa/pipe/tgsi/exec/tgsi_exec.c
index 8636271a34..1f43f3643e 100644
--- a/src/mesa/pipe/tgsi/exec/tgsi_exec.c
+++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.c
@@ -1352,8 +1352,8 @@ linear_interpolation(
unsigned attrib,
unsigned chan )
{
- const float x = mach->Inputs[0].xyzw[0].f[0];
- const float y = mach->Inputs[0].xyzw[1].f[0];
+ const float x = mach->QuadPos.xyzw[0].f[0];
+ const float y = mach->QuadPos.xyzw[1].f[0];
const float dadx = mach->InterpCoefs[attrib].dadx[chan];
const float dady = mach->InterpCoefs[attrib].dady[chan];
const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
@@ -1369,15 +1369,17 @@ perspective_interpolation(
unsigned attrib,
unsigned chan )
{
- const float x = mach->Inputs[0].xyzw[0].f[0];
- const float y = mach->Inputs[0].xyzw[1].f[0];
+ const float x = mach->QuadPos.xyzw[0].f[0];
+ const float y = mach->QuadPos.xyzw[1].f[0];
const float dadx = mach->InterpCoefs[attrib].dadx[chan];
const float dady = mach->InterpCoefs[attrib].dady[chan];
const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
- mach->Inputs[attrib].xyzw[chan].f[0] = a0 / mach->Inputs[0].xyzw[3].f[0];
- mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / mach->Inputs[0].xyzw[3].f[1];
- mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / mach->Inputs[0].xyzw[3].f[2];
- mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / mach->Inputs[0].xyzw[3].f[3];
+ const float *w = mach->QuadPos.xyzw[3].f;
+ /* divide by W here */
+ mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
+ mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
+ mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
+ mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
}
@@ -1402,17 +1404,6 @@ exec_declaration(
last = decl->u.DeclarationRange.Last;
mask = decl->Declaration.UsageMask;
- /* Do not touch WPOS.xy */
- if( first == 0 ) {
- mask &= ~TGSI_WRITEMASK_XY;
- if( mask == TGSI_WRITEMASK_NONE ) {
- first++;
- if( first > last ) {
- return;
- }
- }
- }
-
switch( decl->Interpolation.Interpolate ) {
case TGSI_INTERPOLATE_CONSTANT:
interp = constant_interpolation;
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.h b/src/mesa/pipe/tgsi/exec/tgsi_exec.h
index e7952a08e3..db92e282df 100644
--- a/src/mesa/pipe/tgsi/exec/tgsi_exec.h
+++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.h
@@ -177,6 +177,7 @@ struct tgsi_exec_machine
/* FRAGMENT processor only. */
const struct tgsi_interp_coef *InterpCoefs;
+ struct tgsi_exec_vector QuadPos;
/* Conditional execution masks */
uint CondMask; /**< For IF/ELSE/ENDIF */
diff --git a/src/mesa/pipe/xlib/xm_winsys_aub.c b/src/mesa/pipe/xlib/xm_winsys_aub.c
index ee3c2d6181..0348c2ad40 100644
--- a/src/mesa/pipe/xlib/xm_winsys_aub.c
+++ b/src/mesa/pipe/xlib/xm_winsys_aub.c
@@ -493,6 +493,13 @@ static void aub_i965_batch_reloc( struct brw_winsys *sws,
iws->data[iws->nr++] = aub_bo(buf)->offset + delta;
}
+static unsigned aub_i965_get_buffer_offset( struct brw_winsys *sws,
+ struct pipe_buffer_handle *buf,
+ unsigned access_flags )
+{
+ return aub_bo(buf)->offset;
+}
+
static void aub_i965_batch_flush( struct brw_winsys *sws,
@@ -605,6 +612,7 @@ xmesa_create_i965simple( struct pipe_winsys *winsys )
iws->winsys.batch_reloc = aub_i965_batch_reloc;
iws->winsys.batch_flush = aub_i965_batch_flush;
iws->winsys.buffer_subdata_typed = aub_i965_buffer_subdata_typed;
+ iws->winsys.get_buffer_offset = aub_i965_get_buffer_offset;
iws->pipe_winsys = winsys;
diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c
index 2a7128dd27..5c6b89d78c 100644
--- a/src/mesa/state_tracker/st_atom_rasterizer.c
+++ b/src/mesa/state_tracker/st_atom_rasterizer.c
@@ -77,6 +77,8 @@ static void update_raster_state( struct st_context *st )
uint i;
memset(&raster, 0, sizeof(raster));
+
+ raster.origin_lower_left = 1; /* Always true for OpenGL */
/* _NEW_POLYGON, _NEW_BUFFERS
*/
diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c
index 052b6dd144..6241e70b55 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -35,6 +35,7 @@
#include "st_context.h"
#include "st_cache.h"
#include "st_atom.h"
+#include "st_program.h"
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
@@ -116,17 +117,23 @@ gl_filter_to_img_filter(GLenum filter)
static void
update_samplers(struct st_context *st)
{
- GLuint u;
+ const struct st_fragment_program *fs = st->fp;
+ GLuint su;
- for (u = 0; u < st->ctx->Const.MaxTextureImageUnits; u++) {
- const struct gl_texture_object *texobj
- = st->ctx->Texture.Unit[u]._Current;
+ /* loop over sampler units (aka tex image units) */
+ for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) {
struct pipe_sampler_state sampler;
const struct cso_sampler *cso;
memset(&sampler, 0, sizeof(sampler));
- if (texobj) {
+ if (fs->Base.Base.SamplersUsed & (1 << su)) {
+ GLuint texUnit = fs->Base.Base.SamplerUnits[su];
+ const struct gl_texture_object *texobj
+ = st->ctx->Texture.Unit[texUnit]._Current;
+
+ assert(texobj);
+
sampler.wrap_s = gl_wrap_to_sp(texobj->WrapS);
sampler.wrap_t = gl_wrap_to_sp(texobj->WrapT);
sampler.wrap_r = gl_wrap_to_sp(texobj->WrapR);
@@ -138,7 +145,7 @@ update_samplers(struct st_context *st)
if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB)
sampler.normalized_coords = 1;
- sampler.lod_bias = st->ctx->Texture.Unit[u].LodBias;
+ sampler.lod_bias = st->ctx->Texture.Unit[su].LodBias;
#if 1
sampler.min_lod = texobj->MinLod;
sampler.max_lod = texobj->MaxLod;
@@ -166,10 +173,10 @@ update_samplers(struct st_context *st)
cso = st_cached_sampler_state(st, &sampler);
- if (cso != st->state.sampler[u]) {
+ if (cso != st->state.sampler[su]) {
/* state has changed */
- st->state.sampler[u] = cso;
- st->pipe->bind_sampler_state(st->pipe, u, cso->data);
+ st->state.sampler[su] = cso;
+ st->pipe->bind_sampler_state(st->pipe, su, cso->data);
}
}
}
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index 4ec10badad..33372b0f39 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -151,8 +151,7 @@ find_translated_vp(struct st_context *st,
{
static const GLuint UNUSED = ~0;
struct translated_vertex_program *xvp;
- const GLbitfield fragInputsRead
- = stfp->Base.Base.InputsRead | FRAG_BIT_WPOS;
+ const GLbitfield fragInputsRead = stfp->Base.Base.InputsRead;
/*
* Translate fragment program if needed.
@@ -206,6 +205,7 @@ find_translated_vp(struct st_context *st,
if (xvp->serialNo != stvp->serialNo) {
GLuint outAttr, dummySlot;
const GLbitfield outputsWritten = stvp->Base.Base.OutputsWritten;
+ GLuint numVpOuts = 0;
/* Compute mapping of vertex program outputs to slots, which depends
* on the fragment program's input->slot mapping.
@@ -214,11 +214,24 @@ find_translated_vp(struct st_context *st,
/* set default: */
xvp->output_to_slot[outAttr] = UNUSED;
- if (outputsWritten & (1 << outAttr)) {
+ if (outAttr == VERT_RESULT_HPOS) {
+ /* always put xformed position into slot zero */
+ xvp->output_to_slot[VERT_RESULT_HPOS] = 0;
+ numVpOuts++;
+ }
+ else if (outputsWritten & (1 << outAttr)) {
/* see if the frag prog wants this vert output */
- GLint fpIn = vp_out_to_fp_in(outAttr);
- if (fpIn >= 0) {
- xvp->output_to_slot[outAttr] = stfp->input_to_slot[fpIn];
+ GLint fpInAttrib = vp_out_to_fp_in(outAttr);
+ if (fpInAttrib >= 0) {
+ GLuint fpInSlot = stfp->input_to_slot[fpInAttrib];
+ GLuint vpOutSlot = stfp->fs->state.input_map[fpInSlot];
+ xvp->output_to_slot[outAttr] = vpOutSlot;
+ numVpOuts++;
+ }
+ else if (outAttr == VERT_RESULT_BFC0 ||
+ outAttr == VERT_RESULT_BFC1) {
+ /* backface colors go into last slots */
+ xvp->output_to_slot[outAttr] = numVpOuts++;
}
}
}
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 254740ff20..c40f75417f 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -243,6 +243,10 @@ st_new_renderbuffer_fb(enum pipe_format format)
strb->Base.InternalFormat = GL_DEPTH24_STENCIL8_EXT;
strb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT;
break;
+ case PIPE_FORMAT_S8_UNORM:
+ strb->Base.InternalFormat = GL_STENCIL_INDEX8_EXT;
+ strb->Base._BaseFormat = GL_STENCIL_INDEX;
+ break;
case PIPE_FORMAT_R16G16B16A16_SNORM:
strb->Base.InternalFormat = GL_RGBA16;
strb->Base._BaseFormat = GL_RGBA;
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index b392edf16d..bccabd8004 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -585,6 +585,20 @@ make_temp_decl(
}
+static struct tgsi_full_declaration
+make_sampler_decl(GLuint index)
+{
+ struct tgsi_full_declaration decl;
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.Declaration.Declare = TGSI_DECLARE_RANGE;
+ decl.u.DeclarationRange.First = index;
+ decl.u.DeclarationRange.Last = index;
+ return decl;
+}
+
+
+
/**
* Find the temporaries which are used in the given program.
*/
@@ -675,44 +689,22 @@ tgsi_translate_mesa_program(
if (procType == TGSI_PROCESSOR_FRAGMENT) {
for (i = 0; i < numInputs; i++) {
struct tgsi_full_declaration fulldecl;
- switch (inputSemanticName[i]) {
- case TGSI_SEMANTIC_POSITION:
- /* Fragment XY pos */
- fulldecl = make_input_decl(i,
- GL_TRUE, TGSI_INTERPOLATE_CONSTANT,
- TGSI_WRITEMASK_XY,
- GL_TRUE, TGSI_SEMANTIC_POSITION, 0 );
- ti += tgsi_build_full_declaration(
- &fulldecl,
- &tokens[ti],
- header,
- maxTokens - ti );
- /* Fragment ZW pos */
- fulldecl = make_input_decl(i,
- GL_TRUE, TGSI_INTERPOLATE_LINEAR,
- TGSI_WRITEMASK_ZW,
- GL_TRUE, TGSI_SEMANTIC_POSITION, 0 );
- ti += tgsi_build_full_declaration(&fulldecl,
- &tokens[ti],
- header,
- maxTokens - ti );
- break;
- default:
- fulldecl = make_input_decl(i,
- GL_TRUE, interpMode[i],
- TGSI_WRITEMASK_XYZW,
- GL_TRUE, inputSemanticName[i],
- inputSemanticIndex[i]);
- ti += tgsi_build_full_declaration(&fulldecl,
- &tokens[ti],
- header,
- maxTokens - ti );
- break;
- }
+ fulldecl = make_input_decl(i,
+ GL_TRUE, interpMode[i],
+ TGSI_WRITEMASK_XYZW,
+ GL_TRUE, inputSemanticName[i],
+ inputSemanticIndex[i]);
+ ti += tgsi_build_full_declaration(&fulldecl,
+ &tokens[ti],
+ header,
+ maxTokens - ti );
}
}
else {
/* vertex prog */
+ /* XXX: this could probaby be merged with the clause above.
+ * the only difference is the semantic tags.
+ */
for (i = 0; i < numInputs; i++) {
struct tgsi_full_declaration fulldecl;
fulldecl = make_input_decl(i,
@@ -810,6 +802,19 @@ tgsi_translate_mesa_program(
}
}
+ /* texture samplers */
+ for (i = 0; i < 8; i++) {
+ if (program->SamplersUsed & (1 << i)) {
+ struct tgsi_full_declaration fulldecl;
+ fulldecl = make_sampler_decl( i );
+ ti += tgsi_build_full_declaration(&fulldecl,
+ &tokens[ti],
+ header,
+ maxTokens - ti );
+ }
+ }
+
+
for( i = 0; i < program->NumInstructions; i++ ) {
compile_instruction(
&program->Instructions[i],
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index e64bf14d56..fe22233c93 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -47,7 +47,7 @@
#include "st_mesa_to_tgsi.h"
-#define TGSI_DEBUG 0
+#define TGSI_DEBUG 01
/**
@@ -283,16 +283,17 @@ st_translate_fragment_program(struct st_context *st,
const struct cso_fragment_shader *cso;
GLuint interpMode[16]; /* XXX size? */
GLuint attr;
- GLbitfield inputsRead = stfp->Base.Base.InputsRead;
-
- /* For software rendering, we always need the fragment input position
- * in order to calculate interpolated values.
- * For i915, we always want to emit the semantic info for position.
- */
- inputsRead |= FRAG_BIT_WPOS;
+ const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
+ GLuint vslot = 0;
memset(&fs, 0, sizeof(fs));
+ /* which vertex output goes to the first fragment input: */
+ if (inputsRead & FRAG_BIT_WPOS)
+ vslot = 0;
+ else
+ vslot = 1;
+
/*
* Convert Mesa program inputs to TGSI input register semantics.
*/
@@ -300,15 +301,17 @@ st_translate_fragment_program(struct st_context *st,
if (inputsRead & (1 << attr)) {
const GLuint slot = fs.num_inputs;
- fs.num_inputs++;
-
defaultInputMapping[attr] = slot;
+ fs.input_map[slot] = vslot++;
+
+ fs.num_inputs++;
+
switch (attr) {
case FRAG_ATTRIB_WPOS:
fs.input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
fs.input_semantic_index[slot] = 0;
- interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
+ interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
break;
case FRAG_ATTRIB_COL0:
fs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;