summaryrefslogtreecommitdiff
path: root/src/mesa
diff options
context:
space:
mode:
authorBen Skeggs <skeggsb@gmail.com>2007-12-12 13:11:19 +1100
committerBen Skeggs <skeggsb@gmail.com>2007-12-12 13:11:19 +1100
commit58915980127ab4e57b6b40a8c42f44be4a12aeae (patch)
tree176f379bb19fc8caacf96112a1ebeda8aea90a16 /src/mesa
parente282d22d512d2a5871d0fabb7d855a16b4593c50 (diff)
parentb2ad30d57197c2167789e4f3f5b34af6df56dde2 (diff)
Merge branch 'upstream-gallium-0.1' into darktama-gallium-0.1
Conflicts: src/mesa/pipe/Makefile
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/Makefile8
-rw-r--r--src/mesa/drivers/common/driverfuncs.c1
-rw-r--r--src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c72
-rw-r--r--src/mesa/main/dd.h2
-rw-r--r--src/mesa/main/queryobj.c18
-rw-r--r--src/mesa/main/queryobj.h3
-rw-r--r--src/mesa/pipe/Makefile3
-rw-r--r--src/mesa/pipe/cell/common.h23
-rw-r--r--src/mesa/pipe/cell/ppu/cell_context.c6
-rw-r--r--src/mesa/pipe/cell/ppu/cell_context.h3
-rw-r--r--src/mesa/pipe/cell/ppu/cell_flush.c6
-rw-r--r--src/mesa/pipe/cell/ppu/cell_spu.c150
-rw-r--r--src/mesa/pipe/cell/ppu/cell_spu.h41
-rw-r--r--src/mesa/pipe/cell/ppu/cell_state.h3
-rw-r--r--src/mesa/pipe/cell/ppu/cell_state_sampler.c32
-rw-r--r--src/mesa/pipe/cell/ppu/cell_surface.c83
-rw-r--r--src/mesa/pipe/cell/ppu/cell_surface.h16
-rw-r--r--src/mesa/pipe/cell/spu/main.c77
-rw-r--r--src/mesa/pipe/cell/spu/main.h (renamed from src/mesa/state_tracker/st_atom_clear_color.c)68
-rw-r--r--src/mesa/pipe/cell/spu/tri.c873
-rw-r--r--src/mesa/pipe/cell/spu/tri.h50
-rw-r--r--src/mesa/pipe/draw/draw_context.c21
-rw-r--r--src/mesa/pipe/draw/draw_context.h6
-rw-r--r--src/mesa/pipe/draw/draw_feedback.c253
-rw-r--r--src/mesa/pipe/draw/draw_private.h9
-rw-r--r--src/mesa/pipe/draw/draw_validate.c5
-rw-r--r--src/mesa/pipe/draw/draw_vertex_shader_llvm.c11
-rw-r--r--src/mesa/pipe/failover/fo_context.h2
-rw-r--r--src/mesa/pipe/failover/fo_state.c34
-rw-r--r--src/mesa/pipe/failover/fo_state_emit.c5
-rw-r--r--src/mesa/pipe/i915simple/i915_context.c39
-rw-r--r--src/mesa/pipe/i915simple/i915_context.h6
-rw-r--r--src/mesa/pipe/i915simple/i915_state.c60
-rw-r--r--src/mesa/pipe/i915simple/i915_surface.c3
-rw-r--r--src/mesa/pipe/i965simple/Makefile75
-rw-r--r--src/mesa/pipe/i965simple/brw_batch.h57
-rw-r--r--src/mesa/pipe/i965simple/brw_blit.c217
-rw-r--r--src/mesa/pipe/i965simple/brw_blit.h32
-rw-r--r--src/mesa/pipe/i965simple/brw_cc.c268
-rw-r--r--src/mesa/pipe/i965simple/brw_clip.c201
-rw-r--r--src/mesa/pipe/i965simple/brw_clip.h170
-rw-r--r--src/mesa/pipe/i965simple/brw_clip_line.c245
-rw-r--r--src/mesa/pipe/i965simple/brw_clip_point.c47
-rw-r--r--src/mesa/pipe/i965simple/brw_clip_state.c92
-rw-r--r--src/mesa/pipe/i965simple/brw_clip_tri.c566
-rw-r--r--src/mesa/pipe/i965simple/brw_clip_unfilled.c477
-rw-r--r--src/mesa/pipe/i965simple/brw_clip_util.c351
-rw-r--r--src/mesa/pipe/i965simple/brw_context.c244
-rw-r--r--src/mesa/pipe/i965simple/brw_context.h698
-rw-r--r--src/mesa/pipe/i965simple/brw_curbe.c369
-rw-r--r--src/mesa/pipe/i965simple/brw_defines.h852
-rw-r--r--src/mesa/pipe/i965simple/brw_draw.c235
-rw-r--r--src/mesa/pipe/i965simple/brw_draw.h55
-rw-r--r--src/mesa/pipe/i965simple/brw_draw_upload.c306
-rw-r--r--src/mesa/pipe/i965simple/brw_eu.c130
-rw-r--r--src/mesa/pipe/i965simple/brw_eu.h877
-rw-r--r--src/mesa/pipe/i965simple/brw_eu_debug.c88
-rw-r--r--src/mesa/pipe/i965simple/brw_eu_emit.c1080
-rw-r--r--src/mesa/pipe/i965simple/brw_eu_util.c126
-rw-r--r--src/mesa/pipe/i965simple/brw_flush.c84
-rw-r--r--src/mesa/pipe/i965simple/brw_gs.c196
-rw-r--r--src/mesa/pipe/i965simple/brw_gs.h75
-rw-r--r--src/mesa/pipe/i965simple/brw_gs_emit.c148
-rw-r--r--src/mesa/pipe/i965simple/brw_gs_state.c88
-rw-r--r--src/mesa/pipe/i965simple/brw_metaops.c538
-rw-r--r--src/mesa/pipe/i965simple/brw_misc_state.c425
-rw-r--r--src/mesa/pipe/i965simple/brw_program.c130
-rw-r--r--src/mesa/pipe/i965simple/brw_reg.h76
-rw-r--r--src/mesa/pipe/i965simple/brw_regions.c (renamed from src/mesa/pipe/softpipe/sp_state_feedback.c)55
-rw-r--r--src/mesa/pipe/i965simple/brw_regions.h5
-rw-r--r--src/mesa/pipe/i965simple/brw_sf.c194
-rw-r--r--src/mesa/pipe/i965simple/brw_sf.h111
-rw-r--r--src/mesa/pipe/i965simple/brw_sf_emit.c696
-rw-r--r--src/mesa/pipe/i965simple/brw_sf_state.c236
-rw-r--r--src/mesa/pipe/i965simple/brw_state.c451
-rw-r--r--src/mesa/pipe/i965simple/brw_state.h157
-rw-r--r--src/mesa/pipe/i965simple/brw_state_batch.c113
-rw-r--r--src/mesa/pipe/i965simple/brw_state_cache.c442
-rw-r--r--src/mesa/pipe/i965simple/brw_state_pool.c139
-rw-r--r--src/mesa/pipe/i965simple/brw_state_upload.c231
-rw-r--r--src/mesa/pipe/i965simple/brw_strings.c72
-rw-r--r--src/mesa/pipe/i965simple/brw_structs.h1348
-rw-r--r--src/mesa/pipe/i965simple/brw_surface.c397
-rw-r--r--src/mesa/pipe/i965simple/brw_tex.c65
-rw-r--r--src/mesa/pipe/i965simple/brw_tex_layout.c359
-rw-r--r--src/mesa/pipe/i965simple/brw_tex_layout.h15
-rw-r--r--src/mesa/pipe/i965simple/brw_urb.c197
-rw-r--r--src/mesa/pipe/i965simple/brw_util.c104
-rw-r--r--src/mesa/pipe/i965simple/brw_util.h43
-rw-r--r--src/mesa/pipe/i965simple/brw_vs.c130
-rw-r--r--src/mesa/pipe/i965simple/brw_vs.h82
-rw-r--r--src/mesa/pipe/i965simple/brw_vs_constval.c223
-rw-r--r--src/mesa/pipe/i965simple/brw_vs_emit.c1243
-rw-r--r--src/mesa/pipe/i965simple/brw_vs_state.c102
-rw-r--r--src/mesa/pipe/i965simple/brw_vtbl.c149
-rw-r--r--src/mesa/pipe/i965simple/brw_winsys.h192
-rw-r--r--src/mesa/pipe/i965simple/brw_wm.c338
-rw-r--r--src/mesa/pipe/i965simple/brw_wm.h274
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_debug.c172
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_emit.c1289
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_fp.c1007
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_glsl.c1356
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_iz.c214
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_pass0.c466
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_pass1.c277
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_pass2.c335
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_sampler_state.c275
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_state.c202
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_surface_state.c294
-rw-r--r--src/mesa/pipe/llvm/gallivm.cpp57
-rw-r--r--src/mesa/pipe/llvm/gallivm.h3
-rw-r--r--src/mesa/pipe/llvm/gallivm_builtins.cpp361
-rw-r--r--src/mesa/pipe/llvm/instructions.cpp30
-rw-r--r--src/mesa/pipe/llvm/instructions.h4
-rw-r--r--src/mesa/pipe/llvm/llvm_base_shader.cpp1359
-rw-r--r--src/mesa/pipe/llvm/llvm_builtins.c9
-rw-r--r--src/mesa/pipe/llvm/llvm_entry.c64
-rw-r--r--src/mesa/pipe/llvm/storage.cpp206
-rw-r--r--src/mesa/pipe/llvm/storage.h37
-rw-r--r--src/mesa/pipe/p_compiler.h2
-rw-r--r--src/mesa/pipe/p_context.h44
-rw-r--r--src/mesa/pipe/p_format.h47
-rw-r--r--src/mesa/pipe/p_state.h44
-rw-r--r--src/mesa/pipe/p_util.h77
-rw-r--r--src/mesa/pipe/p_winsys.h18
-rw-r--r--src/mesa/pipe/softpipe/Makefile2
-rw-r--r--src/mesa/pipe/softpipe/sp_context.c42
-rw-r--r--src/mesa/pipe/softpipe/sp_context.h12
-rw-r--r--src/mesa/pipe/softpipe/sp_draw_arrays.c22
-rw-r--r--src/mesa/pipe/softpipe/sp_prim_setup.c35
-rw-r--r--src/mesa/pipe/softpipe/sp_quad_fs.c10
-rw-r--r--src/mesa/pipe/softpipe/sp_quad_occlusion.c16
-rw-r--r--src/mesa/pipe/softpipe/sp_query.c107
-rw-r--r--src/mesa/pipe/softpipe/sp_query.h39
-rw-r--r--src/mesa/pipe/softpipe/sp_state.h16
-rw-r--r--src/mesa/pipe/softpipe/sp_state_sampler.c17
-rw-r--r--src/mesa/pipe/softpipe/sp_state_surface.c8
-rw-r--r--src/mesa/pipe/softpipe/sp_surface.c63
-rw-r--r--src/mesa/pipe/tgsi/exec/tgsi_exec.c3
-rw-r--r--src/mesa/pipe/tgsi/exec/tgsi_exec.h1
-rwxr-xr-xsrc/mesa/pipe/tgsi/exec/tgsi_sse2.c10
-rw-r--r--src/mesa/pipe/xlib/brw_aub.c392
-rw-r--r--src/mesa/pipe/xlib/brw_aub.h114
-rw-r--r--src/mesa/pipe/xlib/xm_api.c18
-rw-r--r--src/mesa/pipe/xlib/xm_winsys.c100
-rw-r--r--src/mesa/pipe/xlib/xm_winsys_aub.c618
-rw-r--r--src/mesa/pipe/xlib/xm_winsys_aub.h67
-rw-r--r--src/mesa/pipe/xlib/xmesaP.h6
-rw-r--r--src/mesa/sources6
-rw-r--r--src/mesa/state_tracker/st_atom.c1
-rw-r--r--src/mesa/state_tracker/st_atom.h1
-rw-r--r--src/mesa/state_tracker/st_atom_depth.c2
-rw-r--r--src/mesa/state_tracker/st_atom_sampler.c15
-rw-r--r--src/mesa/state_tracker/st_atom_texture.c32
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.c19
-rw-r--r--src/mesa/state_tracker/st_cb_fbo.c35
-rw-r--r--src/mesa/state_tracker/st_cb_queryobj.c68
-rw-r--r--src/mesa/state_tracker/st_cb_rasterpos.c44
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c40
-rw-r--r--src/mesa/state_tracker/st_cb_texture.h3
-rw-r--r--src/mesa/state_tracker/st_context.h4
-rw-r--r--src/mesa/state_tracker/st_draw.c1
-rw-r--r--src/mesa/state_tracker/st_format.c4
-rw-r--r--src/mesa/state_tracker/st_texture.c6
-rw-r--r--src/mesa/state_tracker/st_texture.h1
165 files changed, 27557 insertions, 2465 deletions
diff --git a/src/mesa/Makefile b/src/mesa/Makefile
index 6f090d9b3d..dd8e2c3762 100644
--- a/src/mesa/Makefile
+++ b/src/mesa/Makefile
@@ -11,7 +11,9 @@ GL_MINOR = 5
GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY)
-SOFTPIPE_LIB = $(TOP)/src/mesa/pipe/softpipe/libsoftpipe.a
+PIPE_LIB = \
+ $(TOP)/src/mesa/pipe/softpipe/libsoftpipe.a \
+ $(TOP)/src/mesa/pipe/i965simple/libi965simple.a
ifeq ($(CONFIG_NAME), linux-cell)
CELL_LIB = $(TOP)/src/mesa/pipe/cell/ppu/libcell.a
@@ -118,12 +120,12 @@ stand-alone: depend subdirs $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) $(TOP)/$(LIB_DIR)/$
osmesa-only: depend subdirs $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME)
# Make the GL library
-$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(SOFTPIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU)
+$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU)
@ $(TOP)/bin/mklib -o $(GL_LIB) \
-major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \
-install $(TOP)/$(LIB_DIR) \
$(MKLIB_OPTIONS) $(STAND_ALONE_OBJECTS) \
- $(SOFTPIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) $(GL_LIB_DEPS)
+ $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) $(GL_LIB_DEPS)
# Make the OSMesa library
$(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OSMESA_DRIVER_OBJECTS) $(OSMESA16_OBJECTS)
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index ea0cc51d4a..33caf7dae1 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -224,6 +224,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
/* query objects */
driver->NewQueryObject = _mesa_new_query_object;
+ driver->DeleteQuery = _mesa_delete_query;
driver->BeginQuery = _mesa_begin_query;
driver->EndQuery = _mesa_end_query;
driver->WaitQuery = _mesa_wait_query;
diff --git a/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c b/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c
index 9c643dd0ba..c7b519d95b 100644
--- a/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c
+++ b/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c
@@ -178,32 +178,11 @@ intel_flush_frontbuffer( struct pipe_winsys *winsys,
}
-static unsigned
-intel_i915_surface_pitch(struct pipe_winsys *winsys,
- unsigned cpp, unsigned width, unsigned flags)
-{
- /* Choose a pitch to match hardware requirements - requires 64 byte
- * alignment of render targets.
- *
- * XXX: is this ok for textures??
- * clearly want to be able to render to textures under some
- * circumstances, but maybe not always a requirement.
- */
-
- /* XXX is the pitch different for textures vs. drawables? */
- if (1/*flags & PIPE_SURFACE_FLAG_TEXTURE*/) /* or PIPE_SURFACE_FLAG_RENDER? */
- return ((cpp * width + 63) & ~63) / cpp;
- else
- return ((cpp * width + 63) & ~63) / cpp;
-}
-
-
static struct pipe_surface *
-intel_i915_surface_alloc(struct pipe_winsys *winsys, enum pipe_format format)
+intel_i915_surface_alloc(struct pipe_winsys *winsys)
{
struct pipe_surface *surf = CALLOC_STRUCT(pipe_surface);
if (surf) {
- surf->format = format;
surf->refcount = 1;
surf->winsys = winsys;
}
@@ -211,6 +190,53 @@ intel_i915_surface_alloc(struct pipe_winsys *winsys, enum pipe_format format)
}
+/**
+ * Round n up to next multiple.
+ */
+static INLINE unsigned
+round_up(unsigned n, unsigned multiple)
+{
+ return (n + multiple - 1) & ~(multiple - 1);
+}
+
+/**
+ * Copied from xm_winsys.c
+ */
+static int
+intel_i915_surface_alloc_storage(struct pipe_winsys *winsys,
+ struct pipe_surface *surf,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned flags)
+{
+ const unsigned alignment = 64;
+ int ret;
+
+ surf->width = width;
+ surf->height = height;
+ surf->format = format;
+ surf->cpp = pf_get_size(format);
+ surf->pitch = round_up(width, alignment / surf->cpp);
+
+ assert(!surf->buffer);
+ surf->buffer = winsys->buffer_create(winsys, alignment, 0, 0);
+ if(!surf->buffer)
+ return -1;
+
+ ret = winsys->buffer_data(winsys,
+ surf->buffer,
+ surf->pitch * surf->cpp * height,
+ NULL,
+ 0);
+ if(ret) {
+ winsys->buffer_reference(winsys, &surf->buffer, NULL);
+ return ret;
+ }
+
+ return 0;
+}
+
+
static void
intel_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s)
{
@@ -265,8 +291,8 @@ intel_create_pipe_winsys( int fd )
iws->winsys.flush_frontbuffer = intel_flush_frontbuffer;
iws->winsys.printf = intel_printf;
iws->winsys.get_name = intel_get_name;
- iws->winsys.surface_pitch = intel_i915_surface_pitch;
iws->winsys.surface_alloc = intel_i915_surface_alloc;
+ iws->winsys.surface_alloc_storage = intel_i915_surface_alloc_storage;
iws->winsys.surface_release = intel_i915_surface_release;
if (fd)
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index f089fcb48f..3bec3bd433 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -811,8 +811,10 @@ struct dd_function_table {
*/
/*@{*/
struct gl_query_object * (*NewQueryObject)(GLcontext *ctx, GLuint id);
+ void (*DeleteQuery)(GLcontext *ctx, struct gl_query_object *q);
void (*BeginQuery)(GLcontext *ctx, struct gl_query_object *q);
void (*EndQuery)(GLcontext *ctx, struct gl_query_object *q);
+ void (*CheckQuery)(GLcontext *ctx, struct gl_query_object *q);
void (*WaitQuery)(GLcontext *ctx, struct gl_query_object *q);
/*@}*/
diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
index 688d0fc7bc..e30f5480da 100644
--- a/src/mesa/main/queryobj.c
+++ b/src/mesa/main/queryobj.c
@@ -94,8 +94,8 @@ _mesa_wait_query(GLcontext *ctx, struct gl_query_object *q)
* Not removed from hash table here.
* XXX maybe add Delete() method to gl_query_object class and call that instead
*/
-static void
-delete_query_object(struct gl_query_object *q)
+void
+_mesa_delete_query(struct gl_query_object *q)
{
_mesa_free(q);
}
@@ -171,7 +171,7 @@ _mesa_DeleteQueriesARB(GLsizei n, const GLuint *ids)
if (q) {
ASSERT(!q->Active); /* should be caught earlier */
_mesa_HashRemove(ctx->Query.QueryObjects, ids[i]);
- delete_query_object(q);
+ ctx->Driver.DeleteQuery(ctx, q);
}
}
}
@@ -386,6 +386,8 @@ _mesa_GetQueryObjectivARB(GLuint id, GLenum pname, GLint *params)
}
break;
case GL_QUERY_RESULT_AVAILABLE_ARB:
+ if (!q->Ready)
+ ctx->Driver.CheckQuery( ctx, q );
*params = q->Ready;
break;
default:
@@ -424,6 +426,8 @@ _mesa_GetQueryObjectuivARB(GLuint id, GLenum pname, GLuint *params)
}
break;
case GL_QUERY_RESULT_AVAILABLE_ARB:
+ if (!q->Ready)
+ ctx->Driver.CheckQuery( ctx, q );
*params = q->Ready;
break;
default:
@@ -461,6 +465,8 @@ _mesa_GetQueryObjecti64vEXT(GLuint id, GLenum pname, GLint64EXT *params)
*params = q->Result;
break;
case GL_QUERY_RESULT_AVAILABLE_ARB:
+ if (!q->Ready)
+ ctx->Driver.CheckQuery( ctx, q );
*params = q->Ready;
break;
default:
@@ -496,6 +502,8 @@ _mesa_GetQueryObjectui64vEXT(GLuint id, GLenum pname, GLuint64EXT *params)
*params = q->Result;
break;
case GL_QUERY_RESULT_AVAILABLE_ARB:
+ if (!q->Ready)
+ ctx->Driver.CheckQuery( ctx, q );
*params = q->Ready;
break;
default:
@@ -527,8 +535,8 @@ static void
delete_queryobj_cb(GLuint id, void *data, void *userData)
{
struct gl_query_object *q= (struct gl_query_object *) data;
- (void) userData;
- delete_query_object(q);
+ GLcontext *ctx = (GLcontext *)userData;
+ ctx->Driver.DeleteQuery(ctx, q);
}
diff --git a/src/mesa/main/queryobj.h b/src/mesa/main/queryobj.h
index d466aae652..c05a1f3da8 100644
--- a/src/mesa/main/queryobj.h
+++ b/src/mesa/main/queryobj.h
@@ -37,6 +37,9 @@ extern void
_mesa_free_query_data(GLcontext *ctx);
extern void
+_mesa_delete_query(struct gl_query_object *q);
+
+extern void
_mesa_begin_query(GLcontext *ctx, struct gl_query_object *q);
extern void
diff --git a/src/mesa/pipe/Makefile b/src/mesa/pipe/Makefile
index e4b72e4940..fbd36d95f7 100644
--- a/src/mesa/pipe/Makefile
+++ b/src/mesa/pipe/Makefile
@@ -6,7 +6,8 @@ ifeq ($(CONFIG_NAME), linux-cell)
CELL_DIR = cell
endif
-SUBDIRS = softpipe i915simple nv40 nv50 failover pipebuffer $(CELL_DIR)
+SUBDIRS = softpipe i915simple i965simple nv40 nv50 failover \
+ pipebuffer $(CELL_DIR)
default: subdirs
diff --git a/src/mesa/pipe/cell/common.h b/src/mesa/pipe/cell/common.h
index da7de78803..f7f1e2eb41 100644
--- a/src/mesa/pipe/cell/common.h
+++ b/src/mesa/pipe/cell/common.h
@@ -33,11 +33,11 @@
#ifndef CELL_COMMON_H
#define CELL_COMMON_H
+#include "pipe/p_compiler.h"
#include "pipe/p_util.h"
-#define ALIGN16 __attribute__( (aligned( 16 )) )
-
+/** for sanity checking */
#define ASSERT_ALIGN16(ptr) \
assert((((unsigned long) (ptr)) & 0xf) == 0);
@@ -49,7 +49,7 @@
#define CELL_CMD_EXIT 1
#define CELL_CMD_FRAMEBUFFER 2
#define CELL_CMD_CLEAR_TILES 3
-#define CELL_CMD_INVERT_TILES 4
+#define CELL_CMD_TRIANGLE 4
#define CELL_CMD_FINISH 5
@@ -61,7 +61,7 @@ struct cell_command_framebuffer
void *start;
int width, height;
unsigned format;
-} ALIGN16;
+} ALIGN16_ATTRIB;
/**
@@ -70,7 +70,14 @@ struct cell_command_framebuffer
struct cell_command_clear_tiles
{
uint value;
-} ALIGN16;
+} ALIGN16_ATTRIB;
+
+
+struct cell_command_triangle
+{
+ float vert[3][4];
+ float color[3][4];
+} ALIGN16_ATTRIB;
/** XXX unions don't seem to work */
@@ -78,15 +85,17 @@ struct cell_command
{
struct cell_command_framebuffer fb;
struct cell_command_clear_tiles clear;
-} ALIGN16;
+ struct cell_command_triangle tri;
+} ALIGN16_ATTRIB;
+/** This is the object passed to spe_create_thread() */
struct cell_init_info
{
unsigned id;
unsigned num_spus;
struct cell_command *cmd;
-} ALIGN16;
+} ALIGN16_ATTRIB;
diff --git a/src/mesa/pipe/cell/ppu/cell_context.c b/src/mesa/pipe/cell/ppu/cell_context.c
index 68543ecf30..966f355bb3 100644
--- a/src/mesa/pipe/cell/ppu/cell_context.c
+++ b/src/mesa/pipe/cell/ppu/cell_context.c
@@ -208,18 +208,12 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws)
cell->pipe.set_blend_color = cell_set_blend_color;
cell->pipe.set_clip_state = cell_set_clip_state;
- cell->pipe.set_clear_color_state = cell_set_clear_color_state;
cell->pipe.set_constant_buffer = cell_set_constant_buffer;
-#if 0
- cell->pipe.set_feedback_state = cell_set_feedback_state;
-#endif
cell->pipe.set_framebuffer_state = cell_set_framebuffer_state;
cell->pipe.set_polygon_stipple = cell_set_polygon_stipple;
- cell->pipe.set_sampler_units = cell_set_sampler_units;
cell->pipe.set_scissor_state = cell_set_scissor_state;
- cell->pipe.set_texture_state = cell_set_texture_state;
cell->pipe.set_viewport_state = cell_set_viewport_state;
cell->pipe.set_vertex_buffer = cell_set_vertex_buffer;
diff --git a/src/mesa/pipe/cell/ppu/cell_context.h b/src/mesa/pipe/cell/ppu/cell_context.h
index a64692081d..96f000eef4 100644
--- a/src/mesa/pipe/cell/ppu/cell_context.h
+++ b/src/mesa/pipe/cell/ppu/cell_context.h
@@ -47,10 +47,8 @@ struct cell_context
const struct pipe_rasterizer_state *rasterizer;
struct pipe_blend_color blend_color;
- struct pipe_clear_color_state clear_color;
struct pipe_clip_state clip;
struct pipe_constant_buffer constants[2];
- struct pipe_feedback_state feedback;
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
@@ -58,7 +56,6 @@ struct cell_context
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX];
- uint sampler_units[PIPE_MAX_SAMPLERS];
uint dirty;
/** The primitive drawing context */
diff --git a/src/mesa/pipe/cell/ppu/cell_flush.c b/src/mesa/pipe/cell/ppu/cell_flush.c
index e844d13f06..b1ff0e51cd 100644
--- a/src/mesa/pipe/cell/ppu/cell_flush.c
+++ b/src/mesa/pipe/cell/ppu/cell_flush.c
@@ -26,8 +26,6 @@
**************************************************************************/
-#include <cbe_mfc.h>
-
#include "cell_context.h"
#include "cell_flush.h"
#include "cell_spu.h"
@@ -43,12 +41,12 @@ cell_flush(struct pipe_context *pipe, unsigned flags)
/* Send CMD_FINISH to all SPUs */
for (i = 0; i < cell->num_spus; i++) {
- send_mbox_message(control_ps_area[i], CELL_CMD_FINISH);
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH);
}
/* Wait for ack */
for (i = 0; i < cell->num_spus; i++) {
- uint k = wait_mbox_message(control_ps_area[i]);
+ uint k = wait_mbox_message(cell_global.spe_contexts[i]);
assert(k == CELL_CMD_FINISH);
}
}
diff --git a/src/mesa/pipe/cell/ppu/cell_spu.c b/src/mesa/pipe/cell/ppu/cell_spu.c
index ed56250ff1..55a0d5038b 100644
--- a/src/mesa/pipe/cell/ppu/cell_spu.c
+++ b/src/mesa/pipe/cell/ppu/cell_spu.c
@@ -26,7 +26,7 @@
**************************************************************************/
-#include <cbe_mfc.h>
+#include <pthread.h>
#include "cell_spu.h"
#include "pipe/p_format.h"
@@ -40,29 +40,16 @@ helpful headers:
*/
-/**
- * SPU/SPE handles, etc
- */
-speid_t speid[MAX_SPUS];
-spe_spu_control_area_t *control_ps_area[MAX_SPUS];
-
-
-/**
- * Data sent to SPUs
- */
-struct cell_init_info inits[MAX_SPUS] ALIGN16;
-struct cell_command command[MAX_SPUS] ALIGN16;
+struct cell_global_info cell_global;
/**
* Write a 1-word message to the given SPE mailbox.
*/
void
-send_mbox_message(spe_spu_control_area_t *ca, unsigned int msg)
+send_mbox_message(spe_context_ptr_t ctx, unsigned int msg)
{
- while (_spe_in_mbox_status(ca) < 1)
- ;
- _spe_in_mbox_write(ca, msg);
+ spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING);
}
@@ -70,13 +57,37 @@ send_mbox_message(spe_spu_control_area_t *ca, unsigned int msg)
* Wait for a 1-word message to arrive in given mailbox.
*/
uint
-wait_mbox_message(spe_spu_control_area_t *ca)
+wait_mbox_message(spe_context_ptr_t ctx)
+{
+ do {
+ unsigned data;
+ int count = spe_out_mbox_read(ctx, &data, 1);
+
+ if (count == 1) {
+ return data;
+ }
+
+ if (count < 0) {
+ /* error */ ;
+ }
+ } while (1);
+}
+
+
+static void *cell_thread_function(void *arg)
{
- uint k;
- while (_spe_out_mbox_status(ca) < 1)
- ;
- k = _spe_out_mbox_read(ca);
- return k;
+ struct cell_init_info *init = (struct cell_init_info *) arg;
+ unsigned entry = SPE_DEFAULT_ENTRY;
+
+ ASSERT_ALIGN16(init);
+
+ if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0,
+ init, NULL, NULL) < 0) {
+ fprintf(stderr, "spe_context_run() failed\n");
+ exit(1);
+ }
+
+ pthread_exit(NULL);
}
@@ -88,30 +99,32 @@ cell_start_spus(uint num_spus)
{
uint i;
- assert((sizeof(struct cell_command) & 0xf) == 0);
- ASSERT_ALIGN16(&command[0]);
- ASSERT_ALIGN16(&command[1]);
+ assert(num_spus <= MAX_SPUS);
- assert((sizeof(struct cell_init_info) & 0xf) == 0);
- ASSERT_ALIGN16(&inits[0]);
- ASSERT_ALIGN16(&inits[1]);
+ ASSERT_ALIGN16(&cell_global.command[0]);
+ ASSERT_ALIGN16(&cell_global.command[1]);
- /* XXX do we need to create a gid with spe_create_group()? */
+ ASSERT_ALIGN16(&cell_global.inits[0]);
+ ASSERT_ALIGN16(&cell_global.inits[1]);
for (i = 0; i < num_spus; i++) {
- inits[i].id = i;
- inits[i].num_spus = num_spus;
- inits[i].cmd = &command[i];
-
- speid[i] = spe_create_thread(0, /* gid */
- &g3d_spu, /* spe program handle */
- &inits[i], /* argp */
- NULL, /* envp */
- -1, /* mask */
- SPE_MAP_PS/*0*/ ); /* flags */
-
- control_ps_area[i] = spe_get_ps_area(speid[i], SPE_CONTROL_AREA);
- assert(control_ps_area[i]);
+ cell_global.inits[i].id = i;
+ cell_global.inits[i].num_spus = num_spus;
+ cell_global.inits[i].cmd = &cell_global.command[i];
+
+ cell_global.spe_contexts[i] = spe_context_create(0, NULL);
+ if (!cell_global.spe_contexts[i]) {
+ fprintf(stderr, "spe_context_create() failed\n");
+ exit(1);
+ }
+
+ if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) {
+ fprintf(stderr, "spe_program_load() failed\n");
+ exit(1);
+ }
+
+ pthread_create(&cell_global.spe_threads[i], NULL, &cell_thread_function,
+ &cell_global.inits[i]);
}
}
@@ -124,14 +137,15 @@ finish_all(uint num_spus)
uint i;
for (i = 0; i < num_spus; i++) {
- send_mbox_message(control_ps_area[i], CELL_CMD_FINISH);
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH);
}
for (i = 0; i < num_spus; i++) {
/* wait for mbox message */
unsigned k;
- while (_spe_out_mbox_status(control_ps_area[i]) < 1)
+
+ while (spe_out_mbox_read(cell_global.spe_contexts[i], &k, 1) < 1)
;
- k = _spe_out_mbox_read(control_ps_area[i]);
+
assert(k == CELL_CMD_FINISH);
}
}
@@ -150,16 +164,16 @@ test_spus(struct cell_context *cell)
sleep(2);
for (i = 0; i < cell->num_spus; i++) {
- command[i].fb.start = surf->map;
- command[i].fb.width = surf->width;
- command[i].fb.height = surf->height;
- command[i].fb.format = PIPE_FORMAT_A8R8G8B8_UNORM;
- send_mbox_message(control_ps_area[i], CELL_CMD_FRAMEBUFFER);
+ cell_global.command[i].fb.start = surf->map;
+ cell_global.command[i].fb.width = surf->width;
+ cell_global.command[i].fb.height = surf->height;
+ cell_global.command[i].fb.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FRAMEBUFFER);
}
for (i = 0; i < cell->num_spus; i++) {
- command[i].clear.value = 0xff880044; /* XXX */
- send_mbox_message(control_ps_area[i], CELL_CMD_CLEAR_TILES);
+ cell_global.command[i].clear.value = 0xff880044; /* XXX */
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_CLEAR_TILES);
}
finish_all(cell->num_spus);
@@ -171,21 +185,7 @@ test_spus(struct cell_context *cell)
}
for (i = 0; i < cell->num_spus; i++) {
- send_mbox_message(control_ps_area[i], CELL_CMD_INVERT_TILES);
- }
-
- finish_all(cell->num_spus);
-
- {
- uint *b = (uint*) surf->map;
- printf("PPU: Inverted results: 0x%x 0x%x 0x%x 0x%x\n",
- b[0], b[1000], b[2000], b[3000]);
- }
-
-
-
- for (i = 0; i < cell->num_spus; i++) {
- send_mbox_message(control_ps_area[i], CELL_CMD_EXIT);
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT);
}
}
@@ -196,10 +196,11 @@ test_spus(struct cell_context *cell)
void
wait_spus(uint num_spus)
{
- int i, status;
+ uint i;
+ void *value;
for (i = 0; i < num_spus; i++) {
- spe_wait( speid[i], &status, 1 );
+ pthread_join(cell_global.spe_threads[i], &value);
}
}
@@ -210,14 +211,11 @@ wait_spus(uint num_spus)
void
cell_spu_exit(struct cell_context *cell)
{
- uint i;
- int status;
+ unsigned i;
for (i = 0; i < cell->num_spus; i++) {
- send_mbox_message(control_ps_area[i], CELL_CMD_EXIT);
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT);
}
- for (i = 0; i < cell->num_spus; i++) {
- spe_wait( speid[i], &status, 1 );
- }
+ wait_spus(cell->num_spus);
}
diff --git a/src/mesa/pipe/cell/ppu/cell_spu.h b/src/mesa/pipe/cell/ppu/cell_spu.h
index dcbc72573f..612cb45c59 100644
--- a/src/mesa/pipe/cell/ppu/cell_spu.h
+++ b/src/mesa/pipe/cell/ppu/cell_spu.h
@@ -29,35 +29,46 @@
#define CELL_SPU
-#include <libspe.h>
+#include <libspe2.h>
#include <libmisc.h>
#include "pipe/cell/common.h"
#include "cell_context.h"
-#define MAX_SPUS 7
+#define MAX_SPUS 8
/**
- * SPU/SPE handles, etc
+ * Global vars, for now anyway.
*/
-extern spe_program_handle_t g3d_spu;
-extern speid_t speid[MAX_SPUS];
-extern spe_spu_control_area_t *control_ps_area[MAX_SPUS];
+struct cell_global_info
+{
+ /**
+ * SPU/SPE handles, etc
+ */
+ spe_context_ptr_t spe_contexts[MAX_SPUS];
+ pthread_t spe_threads[MAX_SPUS];
+ /**
+ * Data sent to SPUs
+ */
+ struct cell_init_info inits[MAX_SPUS];
+ struct cell_command command[MAX_SPUS];
+};
-/**
- * Data sent to SPUs
- */
-extern struct cell_init_info inits[MAX_SPUS] ALIGN16;
-extern struct cell_command command[MAX_SPUS] ALIGN16;
+extern struct cell_global_info cell_global;
-void
-send_mbox_message(spe_spu_control_area_t *ca, unsigned int msg);
-uint
-wait_mbox_message(spe_spu_control_area_t *ca);
+/** This is the handle for the actual SPE code */
+extern spe_program_handle_t g3d_spu;
+
+
+extern void
+send_mbox_message(spe_context_ptr_t ctx, unsigned int msg);
+
+extern uint
+wait_mbox_message(spe_context_ptr_t ctx);
void
diff --git a/src/mesa/pipe/cell/ppu/cell_state.h b/src/mesa/pipe/cell/ppu/cell_state.h
index 033767d29b..4bad45950b 100644
--- a/src/mesa/pipe/cell/ppu/cell_state.h
+++ b/src/mesa/pipe/cell/ppu/cell_state.h
@@ -93,9 +93,6 @@ void cell_set_constant_buffer(struct pipe_context *pipe,
void cell_set_polygon_stipple( struct pipe_context *,
const struct pipe_poly_stipple * );
-void cell_set_sampler_units( struct pipe_context *,
- uint numSamplers, const uint *units );
-
void cell_set_scissor_state( struct pipe_context *,
const struct pipe_scissor_state * );
diff --git a/src/mesa/pipe/cell/ppu/cell_state_sampler.c b/src/mesa/pipe/cell/ppu/cell_state_sampler.c
index c2a180ed30..1e7d4f08b8 100644
--- a/src/mesa/pipe/cell/ppu/cell_state_sampler.c
+++ b/src/mesa/pipe/cell/ppu/cell_state_sampler.c
@@ -66,35 +66,3 @@ cell_delete_sampler_state(struct pipe_context *pipe,
{
FREE( sampler );
}
-
-
-void
-cell_set_texture_state(struct pipe_context *pipe,
- unsigned unit,
- struct pipe_texture *texture)
-{
- struct cell_context *cell = cell_context(pipe);
-
- assert(unit < PIPE_MAX_SAMPLERS);
-
-#if 0
- cell->texture[unit] = cell_texture(texture); /* ptr, not struct */
- cell_tile_cache_set_texture(cell->tex_cache[unit], texture);
-#endif
-
- cell->dirty |= CELL_NEW_TEXTURE;
-}
-
-
-void
-cell_set_sampler_units(struct pipe_context *pipe,
- uint num_samplers, const uint *units )
-{
- struct cell_context *cell = cell_context(pipe);
- uint i;
- for (i = 0; i < num_samplers; i++)
- cell->sampler_units[i] = units[i];
- cell->dirty |= CELL_NEW_SAMPLER;
-}
-
-
diff --git a/src/mesa/pipe/cell/ppu/cell_surface.c b/src/mesa/pipe/cell/ppu/cell_surface.c
index 1692960244..185eeb26e8 100644
--- a/src/mesa/pipe/cell/ppu/cell_surface.c
+++ b/src/mesa/pipe/cell/ppu/cell_surface.c
@@ -41,28 +41,6 @@
#include "cell_spu.h"
-
-struct pipe_surface *
-cell_create_surface(int width, int height)
-{
-#if 0
- /* XXX total hack */
- struct pipe_surface *ps = CALLOC_STRUCT(pipe_surface);
-
- printf("cell_create_surface\n");
-
- ps->width = width;
- ps->height = height;
-
- ps->region = CALLOC_STRUCT(pipe_region);
- ps->region->map = align_malloc(width * height * 4, 16);
- return ps;
-#endif
- return NULL;
-}
-
-
-
void
cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
unsigned clearValue)
@@ -70,37 +48,54 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
struct cell_context *cell = cell_context(pipe);
uint i;
- printf("%s 0x%08x\n", __FUNCTION__, clearValue);
-
- {
- char s[100];
- pf_sprint_name(s, ps->format);
- printf("format = %s\n", s);
- }
-
if (!ps->map)
pipe_surface_map(ps);
for (i = 0; i < cell->num_spus; i++) {
- command[i].fb.start = ps->map;
- command[i].fb.width = ps->width;
- command[i].fb.height = ps->height;
- command[i].fb.format = ps->format;
- send_mbox_message(control_ps_area[i], CELL_CMD_FRAMEBUFFER);
+ struct cell_command_framebuffer *fb = &cell_global.command[i].fb;
+ fb->start = ps->map;
+ fb->width = ps->width;
+ fb->height = ps->height;
+ fb->format = ps->format;
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FRAMEBUFFER);
}
for (i = 0; i < cell->num_spus; i++) {
- command[i].clear.value = clearValue | (i << 21);
- send_mbox_message(control_ps_area[i], CELL_CMD_CLEAR_TILES);
+ /* XXX clear color varies per-SPU for debugging */
+ cell_global.command[i].clear.value = clearValue | (i << 21);
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_CLEAR_TILES);
}
-}
+#if 1
+ /* XXX Draw a test triangle over the cleared surface */
+ for (i = 0; i < cell->num_spus; i++) {
+ /* Same triangle data for all SPUs */
+ struct cell_command_triangle *tri = &cell_global.command[i].tri;
+ tri->vert[0][0] = 20.0;
+ tri->vert[0][1] = ps->height - 20;
-void
-cell_set_clear_color_state(struct pipe_context *pipe,
- const struct pipe_clear_color_state *clear)
-{
- struct cell_context *cell = cell_context(pipe);
+ tri->vert[1][0] = ps->width - 20.0;
+ tri->vert[1][1] = ps->height - 20;
- cell->clear_color = *clear; /* struct copy */
+ tri->vert[2][0] = ps->width / 2;
+ tri->vert[2][1] = 20.0;
+
+ tri->color[0][0] = 1.0;
+ tri->color[0][1] = 0.0;
+ tri->color[0][2] = 0.0;
+ tri->color[0][3] = 0.0;
+
+ tri->color[1][0] = 0.0;
+ tri->color[1][1] = 1.0;
+ tri->color[1][2] = 0.0;
+ tri->color[1][3] = 0.0;
+
+ tri->color[2][0] = 0.0;
+ tri->color[2][1] = 0.0;
+ tri->color[2][2] = 1.0;
+ tri->color[2][3] = 0.0;
+
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_TRIANGLE);
+ }
+#endif
}
diff --git a/src/mesa/pipe/cell/ppu/cell_surface.h b/src/mesa/pipe/cell/ppu/cell_surface.h
index 8b42ba02d5..4c69c4c89f 100644
--- a/src/mesa/pipe/cell/ppu/cell_surface.h
+++ b/src/mesa/pipe/cell/ppu/cell_surface.h
@@ -26,24 +26,18 @@
**************************************************************************/
-#ifndef CELL_SURFACE
-#define CELL_SURFACE
+#ifndef CELL_SURFACE_H
+#define CELL_SURFACE_H
-#include "pipe/p_state.h"
+struct pipe_context;
+struct pipe_surface;
-extern struct pipe_surface *
-cell_create_surface(int width, int height);
-
extern void
cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
unsigned clearValue);
-extern void
-cell_set_clear_color_state(struct pipe_context *pipe,
- const struct pipe_clear_color_state *clear);
-
-#endif /* CELL_SURFACE */
+#endif /* CELL_SURFACE_H */
diff --git a/src/mesa/pipe/cell/spu/main.c b/src/mesa/pipe/cell/spu/main.c
index 226d81b4ca..cc5eddb0f5 100644
--- a/src/mesa/pipe/cell/spu/main.c
+++ b/src/mesa/pipe/cell/spu/main.c
@@ -34,6 +34,7 @@
#include <libmisc.h>
#include <spu_mfcio.h>
+#include "main.h"
#include "tri.h"
#include "pipe/cell/common.h"
@@ -43,21 +44,15 @@ helpful headers:
/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
*/
-static struct cell_init_info init;
+volatile struct cell_init_info init;
-struct framebuffer {
- void *start;
- uint width, height;
- uint width_tiles, height_tiles; /**< width and height in tiles */
-};
-static struct framebuffer fb;
+struct framebuffer fb;
+int DefaultTag;
-static int DefaultTag = 1;
-
-static inline void
+void
wait_on_mask(unsigned tag)
{
mfc_write_tag_mask( tag );
@@ -66,7 +61,7 @@ wait_on_mask(unsigned tag)
-static void
+void
get_tile(const struct framebuffer *fb, uint tx, uint ty, uint *tile)
{
uint offset = ty * fb->width_tiles + tx;
@@ -89,7 +84,7 @@ get_tile(const struct framebuffer *fb, uint tx, uint ty, uint *tile)
0 /* rid */);
}
-static void
+void
put_tile(const struct framebuffer *fb, uint tx, uint ty, const uint *tile)
{
uint offset = ty * fb->width_tiles + tx;
@@ -119,7 +114,7 @@ clear_tiles(const struct cell_command_clear_tiles *clear)
{
uint num_tiles = fb.width_tiles * fb.height_tiles;
uint i;
- uint tile[TILE_SIZE * TILE_SIZE] ALIGN16;
+ uint tile[TILE_SIZE * TILE_SIZE] ALIGN16_ATTRIB;
for (i = 0; i < TILE_SIZE * TILE_SIZE; i++)
tile[i] = clear->value;
@@ -136,32 +131,29 @@ clear_tiles(const struct cell_command_clear_tiles *clear)
}
-/** Invert all pixels in all tiles */
static void
-invert_tiles(void)
+triangle(const struct cell_command_triangle *tri)
{
uint num_tiles = fb.width_tiles * fb.height_tiles;
- uint i, j;
- uint tile[TILE_SIZE * TILE_SIZE] ALIGN16;
+ struct prim_header prim;
+ uint i;
+
+ COPY_4V(prim.v[0].data[0], tri->vert[0]);
+ COPY_4V(prim.v[1].data[0], tri->vert[1]);
+ COPY_4V(prim.v[2].data[0], tri->vert[2]);
+
+ COPY_4V(prim.v[0].data[1], tri->color[0]);
+ COPY_4V(prim.v[1].data[1], tri->color[1]);
+ COPY_4V(prim.v[2].data[1], tri->color[2]);
for (i = init.id; i < num_tiles; i += init.num_spus) {
uint tx = i % fb.width_tiles;
uint ty = i / fb.width_tiles;
-
- get_tile(&fb, tx, ty, tile);
- wait_on_mask(1 << DefaultTag);
-
- for (j = 0; j < TILE_SIZE * TILE_SIZE; j++) {
- tile[j] = ~tile[j];
- }
-
- put_tile(&fb, tx, ty, tile);
+ draw_triangle(&prim, tx, ty);
}
}
-struct cell_command cmd ALIGN16;
-
/**
* Temporary/simple main loop for SPEs: Get a command, execute it, repeat.
@@ -169,7 +161,9 @@ struct cell_command cmd ALIGN16;
static void
main_loop(void)
{
+ struct cell_command cmd;
int exitFlag = 0;
+
printf("SPU %u: Enter main loop\n", init.id);
assert((sizeof(struct cell_command) & 0xf) == 0);
@@ -207,17 +201,19 @@ main_loop(void)
cmd.fb.start);
fb.width = cmd.fb.width;
fb.height = cmd.fb.height;
- fb.width_tiles = fb.width / TILE_SIZE;
- fb.height_tiles = fb.height / TILE_SIZE;
+ fb.width_tiles = (fb.width + TILE_SIZE - 1) / TILE_SIZE;
+ fb.height_tiles = (fb.height + TILE_SIZE - 1) / TILE_SIZE;
+ printf("SPU %u: %u x %u tiles\n",
+ init.id, fb.width_tiles, fb.height_tiles);
fb.start = cmd.fb.start;
break;
case CELL_CMD_CLEAR_TILES:
printf("SPU %u: CLEAR to 0x%08x\n", init.id, cmd.clear.value);
clear_tiles(&cmd.clear);
break;
- case CELL_CMD_INVERT_TILES:
- printf("SPU %u: INVERT_TILES\n", init.id);
- invert_tiles();
+ case CELL_CMD_TRIANGLE:
+ printf("SPU %u: TRIANGLE\n", init.id);
+ triangle(&cmd.tri);
break;
case CELL_CMD_FINISH:
printf("SPU %u: FINISH\n", init.id);
@@ -238,15 +234,21 @@ main_loop(void)
+/**
+ * SPE entrypoint.
+ * Note: example programs declare params as 'unsigned long long' but
+ * that doesn't work.
+ */
int
-main(unsigned long long speid,
- unsigned long long argp,
- unsigned long long envp)
+main(unsigned long speid, unsigned long argp)
{
int tag = 0;
(void) speid;
- (void) envp;
+
+ DefaultTag = 1;
+
+ printf("SPU: main() speid=%lu\n", speid);
mfc_get(&init, /* dest */
(unsigned int) argp, /* src */
@@ -256,6 +258,7 @@ main(unsigned long long speid,
0 /* rid */);
wait_on_mask( 1 << tag );
+
main_loop();
return 0;
diff --git a/src/mesa/state_tracker/st_atom_clear_color.c b/src/mesa/pipe/cell/spu/main.h
index ce3431c5d3..8c2796387f 100644
--- a/src/mesa/state_tracker/st_atom_clear_color.c
+++ b/src/mesa/pipe/cell/spu/main.h
@@ -25,39 +25,37 @@
*
**************************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- * Brian Paul
- */
-
-#include "st_context.h"
-#include "st_atom.h"
-#include "pipe/p_context.h"
-
-
-static void
-update_clear_color_state( struct st_context *st )
-{
- struct pipe_clear_color_state clear;
-
- clear.color[0] = st->ctx->Color.ClearColor[0];
- clear.color[1] = st->ctx->Color.ClearColor[1];
- clear.color[2] = st->ctx->Color.ClearColor[2];
- clear.color[3] = st->ctx->Color.ClearColor[3];
-
- if (memcmp(&clear, &st->state.clear_color, sizeof(clear)) != 0) {
- st->state.clear_color = clear;
- st->pipe->set_clear_color_state( st->pipe, &clear );
- }
-}
-
-
-const struct st_tracked_state st_update_clear_color = {
- .name = "st_update_clear_color",
- .dirty = {
- .mesa = _NEW_COLOR,
- .st = 0,
- },
- .update = update_clear_color_state
+#ifndef MAIN_H
+#define MAIN_H
+
+
+#include <libmisc.h>
+#include <spu_mfcio.h>
+#include "pipe/cell/common.h"
+
+
+extern volatile struct cell_init_info init;
+
+struct framebuffer {
+ void *start;
+ uint width, height;
+ uint width_tiles, height_tiles; /**< width and height in tiles */
};
+
+extern struct framebuffer fb;
+
+
+extern int DefaultTag;
+
+
+void
+wait_on_mask(unsigned tag);
+
+void
+get_tile(const struct framebuffer *fb, uint tx, uint ty, uint *tile);
+
+void
+put_tile(const struct framebuffer *fb, uint tx, uint ty, const uint *tile);
+
+
+#endif /* MAIN_H */
diff --git a/src/mesa/pipe/cell/spu/tri.c b/src/mesa/pipe/cell/spu/tri.c
index 949c3b4c8e..90452f14ba 100644
--- a/src/mesa/pipe/cell/spu/tri.c
+++ b/src/mesa/pipe/cell/spu/tri.c
@@ -1,9 +1,880 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/**
+ * Triangle rendering within a tile.
+ */
+
+
+#if 0
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_quad.h"
+#include "sp_prim_setup.h"
+#include "pipe/draw/draw_private.h"
+#include "pipe/draw/draw_vertex.h"
+#include "pipe/p_util.h"
+#endif
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_util.h"
+#include "main.h"
#include "tri.h"
+/*
+#include <vmx2spu.h>
+#include <spu_internals.h>
+*/
+
+
+#if 1
+
+/* XXX fix this */
+#undef CEILF
+#define CEILF(X) ((float) (int) ((X) + 0.99999))
+
+
+#define QUAD_TOP_LEFT 0
+#define QUAD_TOP_RIGHT 1
+#define QUAD_BOTTOM_LEFT 2
+#define QUAD_BOTTOM_RIGHT 3
+#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
+#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
+#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
+#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
+#define MASK_ALL 0xf
+
+#define PIPE_MAX_SHADER_INPUTS 8 /* XXX temp */
+
+static int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
+
+static uint tile[TILE_SIZE][TILE_SIZE] ALIGN16_ATTRIB;
+
+#endif
+
+
+#define DEBUG_VERTS 0
+
+/**
+ * Triangle edge info
+ */
+struct edge {
+ float dx; /**< X(v1) - X(v0), used only during setup */
+ float dy; /**< Y(v1) - Y(v0), used only during setup */
+ float dxdy; /**< dx/dy */
+ float sx, sy; /**< first sample point coord */
+ int lines; /**< number of lines on this edge */
+};
+
+
+struct interp_coef
+{
+ float a0[4];
+ float dadx[4];
+ float dady[4];
+};
+
+/**
+ * Triangle setup info (derived from draw_stage).
+ * Also used for line drawing (taking some liberties).
+ */
+struct setup_stage {
+#if 0
+ struct draw_stage stage; /**< This must be first (base class) */
+
+ struct softpipe_context *softpipe;
+#endif
+
+ /* Vertices are just an array of floats making up each attribute in
+ * turn. Currently fixed at 4 floats, but should change in time.
+ * Codegen will help cope with this.
+ */
+ const struct vertex_header *vmax;
+ const struct vertex_header *vmid;
+ const struct vertex_header *vmin;
+ const struct vertex_header *vprovoke;
+
+ struct edge ebot;
+ struct edge etop;
+ struct edge emaj;
+
+ float oneoverarea;
+
+#if 0
+ struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
+#else
+ struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
+#endif
+
+#if 0
+ struct quad_header quad;
+#endif
+#if 1
+ uint color;
+#endif
+
+ struct {
+ int left[2]; /**< [0] = row0, [1] = row1 */
+ int right[2];
+ int y;
+ unsigned y_flags;
+ unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
+ } span;
+};
+
+
+#if 0
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
+{
+ return (struct setup_stage *)stage;
+}
+#endif
+
+#if 0
+/**
+ * Clip setup->quad against the scissor/surface bounds.
+ */
+static INLINE void
+quad_clip(struct setup_stage *setup)
+{
+ const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
+ const int minx = (int) cliprect->minx;
+ const int maxx = (int) cliprect->maxx;
+ const int miny = (int) cliprect->miny;
+ const int maxy = (int) cliprect->maxy;
+
+ if (setup->quad.x0 >= maxx ||
+ setup->quad.y0 >= maxy ||
+ setup->quad.x0 + 1 < minx ||
+ setup->quad.y0 + 1 < miny) {
+ /* totally clipped */
+ setup->quad.mask = 0x0;
+ return;
+ }
+ if (setup->quad.x0 < minx)
+ setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
+ if (setup->quad.y0 < miny)
+ setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
+ if (setup->quad.x0 == maxx - 1)
+ setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
+ if (setup->quad.y0 == maxy - 1)
+ setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
+}
+#endif
+
+#if 0
+/**
+ * Emit a quad (pass to next stage) with clipping.
+ */
+static INLINE void
+clip_emit_quad(struct setup_stage *setup)
+{
+ quad_clip(setup);
+ if (setup->quad.mask) {
+ struct softpipe_context *sp = setup->softpipe;
+ sp->quad.first->run(sp->quad.first, &setup->quad);
+ }
+}
+#endif
+
+/**
+ * Evaluate attribute coefficients (plane equations) to compute
+ * attribute values for the four fragments in a quad.
+ * Eg: four colors will be compute.
+ */
+static INLINE void
+eval_coeff( struct setup_stage *setup, uint slot,
+ float x, float y, float result[4][4])
+{
+ uint i;
+ const float *dadx = setup->coef[slot].dadx;
+ const float *dady = setup->coef[slot].dady;
+
+ /* loop over XYZW comps */
+ for (i = 0; i < 4; i++) {
+ result[QUAD_TOP_LEFT][i] = setup->coef[slot].a0[i] + x * dadx[i] + y * dady[i];
+ result[QUAD_TOP_RIGHT][i] = result[0][i] + dadx[i];
+ result[QUAD_BOTTOM_LEFT][i] = result[0][i] + dady[i];
+ result[QUAD_BOTTOM_RIGHT][i] = result[0][i] + dadx[i] + dady[i];
+ }
+}
+
+
+static INLINE uint
+pack_color(const float color[4])
+{
+ uint r = (uint) (color[0] * 255.0);
+ uint g = (uint) (color[1] * 255.0);
+ uint b = (uint) (color[2] * 255.0);
+ uint a = (uint) (color[3] * 255.0);
+ uint icolor = (b << 24) | (g << 16) | (r << 8) | a;
+ return icolor;
+}
+
+
+/**
+ * Emit a quad (pass to next stage). No clipping is done.
+ */
+static INLINE void
+emit_quad( struct setup_stage *setup, int x, int y, unsigned mask )
+{
+#if 0
+ struct softpipe_context *sp = setup->softpipe;
+ setup->quad.x0 = x;
+ setup->quad.y0 = y;
+ setup->quad.mask = mask;
+ sp->quad.first->run(sp->quad.first, &setup->quad);
+#else
+ /* Cell: "write" quad fragments to the tile by setting prim color */
+ int ix = x - cliprect_minx;
+ int iy = y - cliprect_miny;
+ float colors[4][4];
+
+ eval_coeff(setup, 1, (float) x, (float) y, colors);
+
+ if (mask & MASK_TOP_LEFT)
+ tile[iy][ix] = pack_color(colors[QUAD_TOP_LEFT]);
+ if (mask & MASK_TOP_RIGHT)
+ tile[iy][ix+1] = pack_color(colors[QUAD_TOP_RIGHT]);
+ if (mask & MASK_BOTTOM_LEFT)
+ tile[iy+1][ix] = pack_color(colors[QUAD_BOTTOM_LEFT]);
+ if (mask & MASK_BOTTOM_RIGHT)
+ tile[iy+1][ix+1] = pack_color(colors[QUAD_BOTTOM_RIGHT]);
+#endif
+}
+
+
+/**
+ * Given an X or Y coordinate, return the block/quad coordinate that it
+ * belongs to.
+ */
+static INLINE int block( int x )
+{
+ return x & ~1;
+}
+
+
+/**
+ * Compute mask which indicates which pixels in the 2x2 quad are actually inside
+ * the triangle's bounds.
+ *
+ * this is pretty nasty... may need to rework flush_spans again to
+ * fix it, if possible.
+ */
+static unsigned calculate_mask( struct setup_stage *setup, int x )
+{
+ unsigned mask = 0x0;
+
+ if (x >= setup->span.left[0] && x < setup->span.right[0])
+ mask |= MASK_TOP_LEFT;
+
+ if (x >= setup->span.left[1] && x < setup->span.right[1])
+ mask |= MASK_BOTTOM_LEFT;
+
+ if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0])
+ mask |= MASK_TOP_RIGHT;
+
+ if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1])
+ mask |= MASK_BOTTOM_RIGHT;
+
+ return mask;
+}
+
+
+/**
+ * Render a horizontal span of quads
+ */
+static void flush_spans( struct setup_stage *setup )
+{
+ int minleft, maxright;
+ int x;
+
+ switch (setup->span.y_flags) {
+ case 0x3:
+ /* both odd and even lines written (both quad rows) */
+ minleft = MIN2(setup->span.left[0], setup->span.left[1]);
+ maxright = MAX2(setup->span.right[0], setup->span.right[1]);
+ break;
+
+ case 0x1:
+ /* only even line written (quad top row) */
+ minleft = setup->span.left[0];
+ maxright = setup->span.right[0];
+ break;
+
+ case 0x2:
+ /* only odd line written (quad bottom row) */
+ minleft = setup->span.left[1];
+ maxright = setup->span.right[1];
+ break;
+
+ default:
+ return;
+ }
+
+ /* XXX this loop could be moved into the above switch cases and
+ * calculate_mask() could be simplified a bit...
+ */
+ for (x = block(minleft); x <= block(maxright); x += 2) {
+ emit_quad( setup, x, setup->span.y,
+ calculate_mask( setup, x ) );
+ }
+
+ setup->span.y = 0;
+ setup->span.y_flags = 0;
+ setup->span.right[0] = 0;
+ setup->span.right[1] = 0;
+}
+
+#if DEBUG_VERTS
+static void print_vertex(const struct setup_stage *setup,
+ const struct vertex_header *v)
+{
+ int i;
+ fprintf(stderr, "Vertex: (%p)\n", v);
+ for (i = 0; i < setup->quad.nr_attrs; i++) {
+ fprintf(stderr, " %d: %f %f %f %f\n", i,
+ v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
+ }
+}
+#endif
+
+static boolean setup_sort_vertices( struct setup_stage *setup,
+ const struct prim_header *prim )
+{
+#if 0
+ const struct vertex_header *v0 = prim->v[0];
+ const struct vertex_header *v1 = prim->v[1];
+ const struct vertex_header *v2 = prim->v[2];
+#else
+ const struct vertex_header *v0 = &prim->v[0];
+ const struct vertex_header *v1 = &prim->v[1];
+ const struct vertex_header *v2 = &prim->v[2];
+#endif
+
+#if DEBUG_VERTS
+ fprintf(stderr, "Triangle:\n");
+ print_vertex(setup, v0);
+ print_vertex(setup, v1);
+ print_vertex(setup, v2);
+#endif
+
+ setup->vprovoke = v2;
+
+ /* determine bottom to top order of vertices */
+ {
+ float y0 = v0->data[0][1];
+ float y1 = v1->data[0][1];
+ float y2 = v2->data[0][1];
+ if (y0 <= y1) {
+ if (y1 <= y2) {
+ /* y0<=y1<=y2 */
+ setup->vmin = v0;
+ setup->vmid = v1;
+ setup->vmax = v2;
+ }
+ else if (y2 <= y0) {
+ /* y2<=y0<=y1 */
+ setup->vmin = v2;
+ setup->vmid = v0;
+ setup->vmax = v1;
+ }
+ else {
+ /* y0<=y2<=y1 */
+ setup->vmin = v0;
+ setup->vmid = v2;
+ setup->vmax = v1;
+ }
+ }
+ else {
+ if (y0 <= y2) {
+ /* y1<=y0<=y2 */
+ setup->vmin = v1;
+ setup->vmid = v0;
+ setup->vmax = v2;
+ }
+ else if (y2 <= y1) {
+ /* y2<=y1<=y0 */
+ setup->vmin = v2;
+ setup->vmid = v1;
+ setup->vmax = v0;
+ }
+ else {
+ /* y1<=y2<=y0 */
+ setup->vmin = v1;
+ setup->vmid = v2;
+ setup->vmax = v0;
+ }
+ }
+ }
+
+ setup->ebot.dx = setup->vmid->data[0][0] - setup->vmin->data[0][0];
+ setup->ebot.dy = setup->vmid->data[0][1] - setup->vmin->data[0][1];
+ setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0];
+ setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1];
+ setup->etop.dx = setup->vmax->data[0][0] - setup->vmid->data[0][0];
+ setup->etop.dy = setup->vmax->data[0][1] - setup->vmid->data[0][1];
+
+ /*
+ * Compute triangle's area. Use 1/area to compute partial
+ * derivatives of attributes later.
+ *
+ * The area will be the same as prim->det, but the sign may be
+ * different depending on how the vertices get sorted above.
+ *
+ * To determine whether the primitive is front or back facing we
+ * use the prim->det value because its sign is correct.
+ */
+ {
+ const float area = (setup->emaj.dx * setup->ebot.dy -
+ setup->ebot.dx * setup->emaj.dy);
+
+ setup->oneoverarea = 1.0f / area;
+ /*
+ _mesa_printf("%s one-over-area %f area %f det %f\n",
+ __FUNCTION__, setup->oneoverarea, area, prim->det );
+ */
+ }
+
+#if 0
+ /* We need to know if this is a front or back-facing triangle for:
+ * - the GLSL gl_FrontFacing fragment attribute (bool)
+ * - two-sided stencil test
+ */
+ setup->quad.facing = (prim->det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
+#endif
+
+ return TRUE;
+}
+
+
+#if 0
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ * The value value comes from vertex->data[slot][i].
+ * The result will be put into setup->coef[slot].a0[i].
+ * \param slot which attribute slot
+ * \param i which component of the slot (0..3)
+ */
+static void const_coeff( struct setup_stage *setup,
+ unsigned slot,
+ unsigned i )
+{
+ assert(slot < PIPE_MAX_SHADER_INPUTS);
+ assert(i <= 3);
+
+ setup->coef[slot].dadx[i] = 0;
+ setup->coef[slot].dady[i] = 0;
+
+ /* need provoking vertex info!
+ */
+ setup->coef[slot].a0[i] = setup->vprovoke->data[slot][i];
+}
+#endif
+
+
+/**
+ * Compute a0, dadx and dady for a linearly interpolated coefficient,
+ * for a triangle.
+ */
+static void tri_linear_coeff( struct setup_stage *setup,
+ unsigned slot )
+{
+ uint i;
+ for (i = 0; i < 4; i++) {
+ float botda = setup->vmid->data[slot][i] - setup->vmin->data[slot][i];
+ float majda = setup->vmax->data[slot][i] - setup->vmin->data[slot][i];
+ float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
+ float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
+
+ assert(slot < PIPE_MAX_SHADER_INPUTS);
+
+ setup->coef[slot].dadx[i] = a * setup->oneoverarea;
+ setup->coef[slot].dady[i] = b * setup->oneoverarea;
+
+ /* calculate a0 as the value which would be sampled for the
+ * fragment at (0,0), taking into account that we want to sample at
+ * pixel centers, in other words (0.5, 0.5).
+ *
+ * this is neat but unfortunately not a good way to do things for
+ * triangles with very large values of dadx or dady as it will
+ * result in the subtraction and re-addition from a0 of a very
+ * large number, which means we'll end up loosing a lot of the
+ * fractional bits and precision from a0. the way to fix this is
+ * to define a0 as the sample at a pixel center somewhere near vmin
+ * instead - i'll switch to this later.
+ */
+ setup->coef[slot].a0[i] = (setup->vmin->data[slot][i] -
+ (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) +
+ setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f)));
+ }
+
+ /*
+ _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
+ slot, "xyzw"[i],
+ setup->coef[slot].a0[i],
+ setup->coef[slot].dadx[i],
+ setup->coef[slot].dady[i]);
+ */
+}
+
+
+#if 0
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void tri_persp_coeff( struct setup_stage *setup,
+ unsigned slot,
+ unsigned i )
+{
+ /* premultiply by 1/w:
+ */
+ float mina = setup->vmin->data[slot][i] * setup->vmin->data[0][3];
+ float mida = setup->vmid->data[slot][i] * setup->vmid->data[0][3];
+ float maxa = setup->vmax->data[slot][i] * setup->vmax->data[0][3];
+
+ float botda = mida - mina;
+ float majda = maxa - mina;
+ float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
+ float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
+
+ /*
+ printf("tri persp %d,%d: %f %f %f\n", slot, i,
+ setup->vmin->data[slot][i],
+ setup->vmid->data[slot][i],
+ setup->vmax->data[slot][i]
+ );
+ */
+
+ assert(slot < PIPE_MAX_SHADER_INPUTS);
+ assert(i <= 3);
+
+ setup->coef[slot].dadx[i] = a * setup->oneoverarea;
+ setup->coef[slot].dady[i] = b * setup->oneoverarea;
+ setup->coef[slot].a0[i] = (mina -
+ (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) +
+ setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f)));
+}
+#endif
+
+
+/**
+ * Compute the setup->coef[] array dadx, dady, a0 values.
+ * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
+ */
+static void setup_tri_coefficients( struct setup_stage *setup )
+{
+#if 0
+ const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode;
+ unsigned slot, j;
+
+ /* z and w are done by linear interpolation:
+ */
+ tri_linear_coeff(setup, 0, 2);
+ tri_linear_coeff(setup, 0, 3);
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 1; slot < setup->quad.nr_attrs; slot++) {
+ switch (interp[slot]) {
+ case INTERP_CONSTANT:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ const_coeff(setup, slot, j);
+ break;
+
+ case INTERP_LINEAR:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ tri_linear_coeff(setup, slot, j);
+ break;
+
+ case INTERP_PERSPECTIVE:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ tri_persp_coeff(setup, slot, j);
+ break;
+
+ default:
+ /* invalid interp mode */
+ assert(0);
+ }
+ }
+#else
+ tri_linear_coeff(setup, 1); /* slot 1 = color */
+#endif
+}
+
+
+static void setup_tri_edges( struct setup_stage *setup )
+{
+ float vmin_x = setup->vmin->data[0][0] + 0.5f;
+ float vmid_x = setup->vmid->data[0][0] + 0.5f;
+
+ float vmin_y = setup->vmin->data[0][1] - 0.5f;
+ float vmid_y = setup->vmid->data[0][1] - 0.5f;
+ float vmax_y = setup->vmax->data[0][1] - 0.5f;
+
+ setup->emaj.sy = CEILF(vmin_y);
+ setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy);
+ setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
+ setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
+
+ setup->etop.sy = CEILF(vmid_y);
+ setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy);
+ setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
+ setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
+
+ setup->ebot.sy = CEILF(vmin_y);
+ setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy);
+ setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
+ setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
+}
+
+
+/**
+ * Render the upper or lower half of a triangle.
+ * Scissoring/cliprect is applied here too.
+ */
+static void subtriangle( struct setup_stage *setup,
+ struct edge *eleft,
+ struct edge *eright,
+ unsigned lines )
+{
+#if 0
+ const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
+ const int minx = (int) cliprect->minx;
+ const int maxx = (int) cliprect->maxx;
+ const int miny = (int) cliprect->miny;
+ const int maxy = (int) cliprect->maxy;
+#else
+ const int minx = cliprect_minx;
+ const int maxx = cliprect_maxx;
+ const int miny = cliprect_miny;
+ const int maxy = cliprect_maxy;
+#endif
+ int y, start_y, finish_y;
+ int sy = (int)eleft->sy;
+
+ assert((int)eleft->sy == (int) eright->sy);
+
+ /* clip top/bottom */
+ start_y = sy;
+ finish_y = sy + lines;
+
+ if (start_y < miny)
+ start_y = miny;
+
+ if (finish_y > maxy)
+ finish_y = maxy;
+
+ start_y -= sy;
+ finish_y -= sy;
+
+ /*
+ _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
+ */
+
+ for (y = start_y; y < finish_y; y++) {
+
+ /* avoid accumulating adds as floats don't have the precision to
+ * accurately iterate large triangle edges that way. luckily we
+ * can just multiply these days.
+ *
+ * this is all drowned out by the attribute interpolation anyway.
+ */
+ int left = (int)(eleft->sx + y * eleft->dxdy);
+ int right = (int)(eright->sx + y * eright->dxdy);
+
+ /* clip left/right */
+ if (left < minx)
+ left = minx;
+ if (right > maxx)
+ right = maxx;
+
+ if (left < right) {
+ int _y = sy + y;
+ if (block(_y) != setup->span.y) {
+ flush_spans(setup);
+ setup->span.y = block(_y);
+ }
+
+ setup->span.left[_y&1] = left;
+ setup->span.right[_y&1] = right;
+ setup->span.y_flags |= 1<<(_y&1);
+ }
+ }
+
+
+ /* save the values so that emaj can be restarted:
+ */
+ eleft->sx += lines * eleft->dxdy;
+ eright->sx += lines * eright->dxdy;
+ eleft->sy += lines;
+ eright->sy += lines;
+}
+
+
+/**
+ * Do setup for triangle rasterization, then render the triangle.
+ */
+static void setup_tri(
+#if 0
+ struct draw_stage *stage,
+#endif
+ struct prim_header *prim )
+{
+#if 0
+ struct setup_stage *setup = setup_stage( stage );
+#else
+ struct setup_stage ss;
+ struct setup_stage *setup = &ss;
+ ss.color = prim->color;
+#endif
+
+ /*
+ _mesa_printf("%s\n", __FUNCTION__ );
+ */
+
+ setup_sort_vertices( setup, prim );
+ setup_tri_coefficients( setup );
+ setup_tri_edges( setup );
+
+#if 0
+ setup->quad.prim = PRIM_TRI;
+#endif
+
+ setup->span.y = 0;
+ setup->span.y_flags = 0;
+ setup->span.right[0] = 0;
+ setup->span.right[1] = 0;
+ /* setup->span.z_mode = tri_z_mode( setup->ctx ); */
+
+ /* init_constant_attribs( setup ); */
+
+ if (setup->oneoverarea < 0.0) {
+ /* emaj on left:
+ */
+ subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
+ subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
+ }
+ else {
+ /* emaj on right:
+ */
+ subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
+ subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
+ }
+
+ flush_spans( setup );
+}
+
+
+
+
+#if 0
+static void setup_begin( struct draw_stage *stage )
+{
+ struct setup_stage *setup = setup_stage(stage);
+ struct softpipe_context *sp = setup->softpipe;
+
+ setup->quad.nr_attrs = setup->softpipe->nr_frag_attrs;
+
+ sp->quad.first->begin(sp->quad.first);
+}
+#endif
+
+#if 0
+static void setup_end( struct draw_stage *stage )
+{
+}
+#endif
+
+
+#if 0
+static void reset_stipple_counter( struct draw_stage *stage )
+{
+ struct setup_stage *setup = setup_stage(stage);
+ setup->softpipe->line_stipple_counter = 0;
+}
+#endif
+
+#if 0
+static void render_destroy( struct draw_stage *stage )
+{
+ FREE( stage );
+}
+#endif
+
+
+#if 0
+/**
+ * Create a new primitive setup/render stage.
+ */
+struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe )
+{
+ struct setup_stage *setup = CALLOC_STRUCT(setup_stage);
+
+ setup->softpipe = softpipe;
+ setup->stage.draw = softpipe->draw;
+ setup->stage.begin = setup_begin;
+ setup->stage.point = setup_point;
+ setup->stage.line = setup_line;
+ setup->stage.tri = setup_tri;
+ setup->stage.end = setup_end;
+ setup->stage.reset_stipple_counter = reset_stipple_counter;
+ setup->stage.destroy = render_destroy;
+
+ setup->quad.coef = setup->coef;
+
+ return &setup->stage;
+}
+#endif
+
+
void
-draw_triangle(int v1, int v2, int v3)
+draw_triangle(struct prim_header *tri, uint tx, uint ty)
{
+ /* set clipping bounds to tile bounds */
+ cliprect_minx = tx * TILE_SIZE;
+ cliprect_miny = ty * TILE_SIZE;
+ cliprect_maxx = (tx + 1) * TILE_SIZE;
+ cliprect_maxy = (ty + 1) * TILE_SIZE;
+
+ get_tile(&fb, tx, ty, (uint *) tile);
+ wait_on_mask(1 << DefaultTag);
+ setup_tri(tri);
+ put_tile(&fb, tx, ty, (uint *) tile);
+ wait_on_mask(1 << DefaultTag);
}
diff --git a/src/mesa/pipe/cell/spu/tri.h b/src/mesa/pipe/cell/spu/tri.h
index 6a915de60d..dc66ad0f47 100644
--- a/src/mesa/pipe/cell/spu/tri.h
+++ b/src/mesa/pipe/cell/spu/tri.h
@@ -1,4 +1,52 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef TRI_H
+#define TRI_H
+
+
+/**
+ * Simplified types taken from other parts of Gallium
+ */
+
+struct vertex_header {
+ float data[2][4]; /* pos and color */
+};
+
+
+struct prim_header {
+ struct vertex_header v[3];
+ uint color;
+};
extern void
-draw_triangle(int v1, int v2, int v3);
+draw_triangle(struct prim_header *tri, uint tx, uint ty);
+
+
+#endif /* TRI_H */
diff --git a/src/mesa/pipe/draw/draw_context.c b/src/mesa/pipe/draw/draw_context.c
index 33727e6547..179f7ed0d6 100644
--- a/src/mesa/pipe/draw/draw_context.c
+++ b/src/mesa/pipe/draw/draw_context.c
@@ -55,7 +55,6 @@ struct draw_context *draw_create( void )
draw->pipeline.clip = draw_clip_stage( draw );
draw->pipeline.flatshade = draw_flatshade_stage( draw );
draw->pipeline.cull = draw_cull_stage( draw );
- draw->pipeline.feedback = draw_feedback_stage( draw );
draw->pipeline.validate = draw_validate_stage( draw );
draw->pipeline.first = draw->pipeline.validate;
@@ -100,7 +99,6 @@ void draw_destroy( struct draw_context *draw )
draw->pipeline.clip->destroy( draw->pipeline.clip );
draw->pipeline.flatshade->destroy( draw->pipeline.flatshade );
draw->pipeline.cull->destroy( draw->pipeline.cull );
- draw->pipeline.feedback->destroy( draw->pipeline.feedback );
draw->pipeline.validate->destroy( draw->pipeline.validate );
if (draw->pipeline.rasterize)
draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
@@ -117,13 +115,6 @@ void draw_flush( struct draw_context *draw )
}
-void draw_set_feedback_state( struct draw_context *draw,
- const struct pipe_feedback_state *feedback )
-{
- draw_flush( draw );
- draw->feedback = *feedback;
-}
-
/**
* Register new primitive rasterization/rendering state.
@@ -223,18 +214,6 @@ draw_set_mapped_constant_buffer(struct draw_context *draw,
}
-void
-draw_set_mapped_feedback_buffer(struct draw_context *draw, uint index,
- void *buffer, uint size)
-{
- draw_flush( draw );
-
- assert(index < PIPE_MAX_FEEDBACK_ATTRIBS);
- draw->user.feedback_buffer[index] = buffer;
- draw->user.feedback_buffer_size[index] = size; /* in bytes */
-}
-
-
diff --git a/src/mesa/pipe/draw/draw_context.h b/src/mesa/pipe/draw/draw_context.h
index 8e2232244c..6dc6e4ce82 100644
--- a/src/mesa/pipe/draw/draw_context.h
+++ b/src/mesa/pipe/draw/draw_context.h
@@ -82,9 +82,6 @@ void draw_set_viewport_state( struct draw_context *draw,
void draw_set_clip_state( struct draw_context *pipe,
const struct pipe_clip_state *clip );
-void draw_set_feedback_state( struct draw_context *draw,
- const struct pipe_feedback_state * );
-
void draw_set_rasterizer_state( struct draw_context *draw,
const struct pipe_rasterizer_state *raster );
@@ -120,9 +117,6 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw,
void draw_set_mapped_constant_buffer(struct draw_context *draw,
const void *buffer);
-void
-draw_set_mapped_feedback_buffer(struct draw_context *draw, uint index,
- void *buffer, uint size);
/***********************************************************************
diff --git a/src/mesa/pipe/draw/draw_feedback.c b/src/mesa/pipe/draw/draw_feedback.c
deleted file mode 100644
index aea6a8184c..0000000000
--- a/src/mesa/pipe/draw/draw_feedback.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Primitive/vertex feedback (and/or discard) stage.
- * Used to implement transformation feedback/streaming and other things
- * which require a post-transformed vertex position (such as rasterpos,
- * selection and feedback modes).
- *
- * Authors:
- * Brian Paul
- */
-
-
-#include "pipe/p_util.h"
-#include "draw_private.h"
-
-
-struct feedback_stage {
- struct draw_stage stage; /**< base class */
- uint num_prim_generated; /**< number of primitives received */
- uint num_prim_emitted; /**< number of primitives fed back */
- uint num_vert_emitted; /**< number of vertices fed back */
- uint max_vert_emit; /**< max number of verts we can emit */
- float *dest[PIPE_MAX_FEEDBACK_ATTRIBS]; /**< dests for vertex attribs */
-};
-
-
-
-/**
- * Check if there's space to store 'numVerts' in the feedback buffer(s).
- */
-static boolean
-check_space(const struct draw_stage *stage, uint numVerts)
-{
- const struct feedback_stage *fs = (struct feedback_stage *) stage;
- return fs->num_vert_emitted + numVerts <= fs->max_vert_emit;
-}
-
-
-/**
- * Record the given vertex's attributes into the feedback buffer(s).
- */
-static void
-feedback_vertex(struct draw_stage *stage, const struct vertex_header *vertex)
-{
- struct feedback_stage *fs = (struct feedback_stage *) stage;
- const struct pipe_feedback_state *feedback = &stage->draw->feedback;
- const uint select = feedback->interleaved ? 0 : 1;
- uint i;
-
- /*
- * Note: 'select' is either 0 or 1. By multiplying 'i' by 'select'
- * we can either address output buffer 0 (for interleaving) or
- * output buffer i (for non-interleaved).
- */
- for (i = 0; i < feedback->num_attribs; i++) {
- const uint slot = feedback->attrib[i];
- const float *src = slot ? vertex->data[slot] : vertex->clip;
- const uint size = feedback->size[i];
- float *dest = fs->dest[i * select];
-
- switch (size) {
- case 4:
- dest[3] = src[3];
- /* fall-through */
- case 3:
- dest[2] = src[2];
- /* fall-through */
- case 2:
- dest[1] = src[1];
- /* fall-through */
- case 1:
- dest[0] = src[0];
- /* fall-through */
- default:
- ;
- }
- fs->dest[i * select] += size;
- }
-
- fs->num_vert_emitted++;
-}
-
-
-static void feedback_begin( struct draw_stage *stage )
-{
- struct feedback_stage *fs = (struct feedback_stage *) stage;
- const struct pipe_feedback_state *feedback = &stage->draw->feedback;
-
- fs->num_prim_generated = 0;
- fs->num_prim_emitted = 0;
- fs->num_vert_emitted = 0;
-
- assert(feedback->enabled);
-
- /* Compute max_vert_emit, the max number of vertices we can emit.
- * And, setup dest[] pointers.
- */
- if (stage->draw->feedback.interleaved) {
- uint i, vertex_size = 0;
- /* compute size of each interleaved vertex, in floats */
- for (i = 0; i < feedback->num_attribs; i++) {
- vertex_size += feedback->size[i];
- }
- /* compute max number of vertices we can feedback */
- fs->max_vert_emit = stage->draw->user.feedback_buffer_size[0]
- / sizeof(float) / vertex_size;
-
- fs->dest[0] = (float *) stage->draw->user.feedback_buffer[0];
- }
- else {
- uint i;
- uint max = ~0;
- for (i = 0; i < feedback->num_attribs; i++) {
- uint n = stage->draw->user.feedback_buffer_size[i]
- / sizeof(float) / feedback->size[i];
- if (n < max)
- max = n;
- fs->dest[i] = (float *) stage->draw->user.feedback_buffer[i];
- }
- fs->max_vert_emit = max;
- }
-
- if (!feedback->discard)
- stage->next->begin( stage->next );
-}
-
-
-static void feedback_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct feedback_stage *fs = (struct feedback_stage *) stage;
-
- fs->num_prim_generated++;
-
- if (stage->draw->feedback.enabled && check_space(stage, 3)) {
- feedback_vertex(stage, header->v[0]);
- feedback_vertex(stage, header->v[1]);
- feedback_vertex(stage, header->v[2]);
- fs->num_prim_emitted++;
- }
-
- if (!stage->draw->feedback.discard)
- stage->next->tri( stage->next, header );
-}
-
-
-static void feedback_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct feedback_stage *fs = (struct feedback_stage *) stage;
-
- fs->num_prim_generated++;
-
- if (stage->draw->feedback.enabled && check_space(stage, 2)) {
- feedback_vertex(stage, header->v[0]);
- feedback_vertex(stage, header->v[1]);
- fs->num_prim_emitted++;
- }
-
- if (!stage->draw->feedback.discard)
- stage->next->line( stage->next, header );
-}
-
-
-static void feedback_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct feedback_stage *fs = (struct feedback_stage *) stage;
-
- fs->num_prim_generated++;
-
- if (stage->draw->feedback.enabled && check_space(stage, 1)) {
- feedback_vertex(stage, header->v[0]);
- fs->num_prim_emitted++;
- }
-
- if (!stage->draw->feedback.discard)
- stage->next->point( stage->next, header );
-}
-
-
-static void feedback_end( struct draw_stage *stage )
-{
-
- /* XXX Unmap the vertex feedback buffers so we can write to them */
-
-
- if (!stage->draw->feedback.discard)
- stage->next->end( stage->next );
-}
-
-
-
-static void feedback_reset_stipple_counter( struct draw_stage *stage )
-{
- if (!stage->draw->feedback.discard)
- stage->next->reset_stipple_counter( stage->next );
-}
-
-
-static void feedback_destroy( struct draw_stage *stage )
-{
- FREE( stage );
-}
-
-
-/**
- * Create feedback drawing stage.
- */
-struct draw_stage *draw_feedback_stage( struct draw_context *draw )
-{
- struct feedback_stage *feedback = CALLOC_STRUCT(feedback_stage);
-
- feedback->stage.draw = draw;
- feedback->stage.next = NULL;
- feedback->stage.begin = feedback_begin;
- feedback->stage.point = feedback_point;
- feedback->stage.line = feedback_line;
- feedback->stage.tri = feedback_tri;
- feedback->stage.end = feedback_end;
- feedback->stage.reset_stipple_counter = feedback_reset_stipple_counter;
- feedback->stage.destroy = feedback_destroy;
-
- return &feedback->stage;
-}
-
-
diff --git a/src/mesa/pipe/draw/draw_private.h b/src/mesa/pipe/draw/draw_private.h
index ca5ca7b3c9..5f89ac121e 100644
--- a/src/mesa/pipe/draw/draw_private.h
+++ b/src/mesa/pipe/draw/draw_private.h
@@ -159,7 +159,6 @@ struct draw_context
struct draw_stage *validate;
/* stages (in logical order) */
- struct draw_stage *feedback;
struct draw_stage *flatshade;
struct draw_stage *clip;
struct draw_stage *cull;
@@ -172,13 +171,10 @@ struct draw_context
/* pipe state that we need: */
const struct pipe_rasterizer_state *rasterizer;
- struct pipe_feedback_state feedback;
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX];
const struct draw_vertex_shader *vertex_shader;
- struct pipe_vertex_buffer feedback_buffer[PIPE_ATTRIB_MAX];
- struct pipe_vertex_element feedback_element[PIPE_ATTRIB_MAX];
/* user-space vertex data, buffers */
struct {
@@ -192,10 +188,6 @@ struct draw_context
/** constant buffer (for vertex shader) */
const void *constants;
-
- /** The vertex feedback buffer */
- void *feedback_buffer[PIPE_MAX_FEEDBACK_ATTRIBS];
- uint feedback_buffer_size[PIPE_MAX_FEEDBACK_ATTRIBS]; /* in bytes */
} user;
/* Clip derived state:
@@ -257,7 +249,6 @@ struct draw_context
-extern struct draw_stage *draw_feedback_stage( struct draw_context *context );
extern struct draw_stage *draw_unfilled_stage( struct draw_context *context );
extern struct draw_stage *draw_twoside_stage( struct draw_context *context );
extern struct draw_stage *draw_offset_stage( struct draw_context *context );
diff --git a/src/mesa/pipe/draw/draw_validate.c b/src/mesa/pipe/draw/draw_validate.c
index 8ce4a926e2..58cf340281 100644
--- a/src/mesa/pipe/draw/draw_validate.c
+++ b/src/mesa/pipe/draw/draw_validate.c
@@ -100,11 +100,6 @@ static void validate_begin( struct draw_stage *stage )
next = draw->pipeline.flatshade;
}
- if (draw->feedback.enabled || draw->feedback.discard) {
- draw->pipeline.feedback->next = next;
- next = draw->pipeline.feedback;
- }
-
draw->pipeline.first = next;
draw->pipeline.first->begin( draw->pipeline.first );
}
diff --git a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
index 29de437595..acd61163fa 100644
--- a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
+++ b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
@@ -38,7 +38,7 @@
#ifdef MESA_LLVM
#include "pipe/llvm/gallivm.h"
-#include "pipe/tgsi/exec/tgsi_core.h"
+#include "pipe/p_shader_tokens.h"
#define DBG 0
@@ -115,13 +115,12 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
unsigned i;
struct vertex_header *dests[VS_QUEUE_LENGTH];
- float inputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4];
- float outputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4];
+ float inputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4] ALIGN16_ATTRIB;
+ float outputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4] ALIGN16_ATTRIB;
float (*consts)[4] = (float (*)[4]) draw->user.constants;
struct gallivm_prog *prog = draw->vertex_shader->llvm_prog;
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
-
/* fetch the inputs */
for (i = 0; i < draw->vs.queue_nr; ++i) {
unsigned elt = draw->vs.queue[i].elt;
@@ -133,7 +132,8 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
gallivm_prog_exec(prog, inputs, outputs, consts,
draw->vs.queue_nr,
draw->vertex_shader->state->num_inputs,
- draw->vertex_info.num_attribs);
+ draw->vertex_info.num_attribs - 2);
+
/* store machine results */
for (int i = 0; i < draw->vs.queue_nr; ++i) {
@@ -158,7 +158,6 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
vOut->clipmask = compute_clipmask(vOut->clip, draw->plane, draw->nr_planes);
vOut->edgeflag = 1;
-
/* divide by w */
w = 1.0f / w;
x *= w;
diff --git a/src/mesa/pipe/failover/fo_context.h b/src/mesa/pipe/failover/fo_context.h
index 7cf18c9ec1..f5eaa0b5fa 100644
--- a/src/mesa/pipe/failover/fo_context.h
+++ b/src/mesa/pipe/failover/fo_context.h
@@ -79,12 +79,10 @@ struct failover_context {
const struct fo_state *vertex_shader;
struct pipe_blend_color blend_color;
- struct pipe_clear_color_state clear_color;
struct pipe_clip_state clip;
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
- uint sampler_units[PIPE_MAX_SAMPLERS];
struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
diff --git a/src/mesa/pipe/failover/fo_state.c b/src/mesa/pipe/failover/fo_state.c
index fd6137ba66..6b4f1517ac 100644
--- a/src/mesa/pipe/failover/fo_state.c
+++ b/src/mesa/pipe/failover/fo_state.c
@@ -146,16 +146,6 @@ failover_set_clip_state( struct pipe_context *pipe,
failover->hw->set_clip_state( failover->hw, clip );
}
-static void
-failover_set_clear_color_state( struct pipe_context *pipe,
- const struct pipe_clear_color_state *clear_color )
-{
- struct failover_context *failover = failover_context(pipe);
-
- failover->clear_color = *clear_color;
- failover->dirty |= FO_NEW_CLEAR_COLOR;
- failover->hw->set_clear_color_state( failover->hw, clear_color );
-}
static void *
failover_create_depth_stencil_state(struct pipe_context *pipe,
@@ -294,18 +284,6 @@ failover_set_polygon_stipple( struct pipe_context *pipe,
failover->hw->set_polygon_stipple( failover->hw, stipple );
}
-static void
-failover_set_sampler_units( struct pipe_context *pipe,
- uint num_samplers, const uint *units )
-{
- struct failover_context *failover = failover_context(pipe);
- uint i;
-
- for (i = 0; i < num_samplers; i++)
- failover->sampler_units[i] = units[i];
- failover->dirty |= FO_NEW_SAMPLER;
- failover->hw->set_sampler_units(failover->hw, num_samplers, units);
-}
static void *
failover_create_rasterizer_state(struct pipe_context *pipe,
@@ -400,16 +378,16 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler)
static void
-failover_set_texture_state(struct pipe_context *pipe,
- unsigned unit,
- struct pipe_texture *texture)
+failover_set_sampler_texture(struct pipe_context *pipe,
+ unsigned unit,
+ struct pipe_texture *texture)
{
struct failover_context *failover = failover_context(pipe);
failover->texture[unit] = texture;
failover->dirty |= FO_NEW_TEXTURE;
failover->dirty_texture |= (1<<unit);
- failover->hw->set_texture_state( failover->hw, unit, texture );
+ failover->hw->set_sampler_texture( failover->hw, unit, texture );
}
@@ -480,12 +458,10 @@ failover_init_state_functions( struct failover_context *failover )
failover->pipe.set_blend_color = failover_set_blend_color;
failover->pipe.set_clip_state = failover_set_clip_state;
- failover->pipe.set_clear_color_state = failover_set_clear_color_state;
failover->pipe.set_framebuffer_state = failover_set_framebuffer_state;
failover->pipe.set_polygon_stipple = failover_set_polygon_stipple;
- failover->pipe.set_sampler_units = failover_set_sampler_units;
failover->pipe.set_scissor_state = failover_set_scissor_state;
- failover->pipe.set_texture_state = failover_set_texture_state;
+ failover->pipe.set_sampler_texture = failover_set_sampler_texture;
failover->pipe.set_viewport_state = failover_set_viewport_state;
failover->pipe.set_vertex_buffer = failover_set_vertex_buffer;
failover->pipe.set_vertex_element = failover_set_vertex_element;
diff --git a/src/mesa/pipe/failover/fo_state_emit.c b/src/mesa/pipe/failover/fo_state_emit.c
index a3aff8abd2..c99ecd4f8d 100644
--- a/src/mesa/pipe/failover/fo_state_emit.c
+++ b/src/mesa/pipe/failover/fo_state_emit.c
@@ -69,9 +69,6 @@ failover_state_emit( struct failover_context *failover )
if (failover->dirty & FO_NEW_CLIP)
failover->sw->set_clip_state( failover->sw, &failover->clip );
- if (failover->dirty & FO_NEW_CLEAR_COLOR)
- failover->sw->set_clear_color_state( failover->sw, &failover->clear_color );
-
if (failover->dirty & FO_NEW_DEPTH_STENCIL)
failover->sw->bind_depth_stencil_state( failover->sw,
failover->depth_stencil->sw_state );
@@ -112,7 +109,7 @@ failover_state_emit( struct failover_context *failover )
if (failover->dirty & FO_NEW_TEXTURE) {
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
if (failover->dirty_texture & (1<<i)) {
- failover->sw->set_texture_state( failover->sw, i,
+ failover->sw->set_sampler_texture( failover->sw, i,
failover->texture[i] );
}
}
diff --git a/src/mesa/pipe/i915simple/i915_context.c b/src/mesa/pipe/i915simple/i915_context.c
index f505ff6ae6..a08cf5087e 100644
--- a/src/mesa/pipe/i915simple/i915_context.c
+++ b/src/mesa/pipe/i915simple/i915_context.c
@@ -170,21 +170,6 @@ static void i915_destroy( struct pipe_context *pipe )
-static void
-i915_begin_query(struct pipe_context *pipe, struct pipe_query_object *q)
-{
- /* should never be called */
- assert(0);
-}
-
-
-static void
-i915_end_query(struct pipe_context *pipe, struct pipe_query_object *q)
-{
- /* should never be called */
- assert(0);
-}
-
static boolean
i915_draw_elements( struct pipe_context *pipe,
@@ -223,18 +208,6 @@ i915_draw_elements( struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
- /* Map feedback buffers if enabled */
- if (i915->feedback.enabled) {
- const uint n = i915->feedback.interleaved ? 1 : i915->feedback.num_attribs;
- for (i = 0; i < n; i++) {
- void *ptr = pipe->winsys->buffer_map(pipe->winsys,
- i915->feedback_buffer[i].buffer,
- PIPE_BUFFER_FLAG_WRITE);
- draw_set_mapped_feedback_buffer(draw, i, ptr,
- i915->feedback_buffer[i].size);
- }
- }
-
draw_set_mapped_constant_buffer(draw,
i915->current.constants[PIPE_SHADER_VERTEX]);
@@ -256,16 +229,6 @@ i915_draw_elements( struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
- /* Unmap feedback buffers if enabled */
- if (i915->feedback.enabled) {
- const uint n = i915->feedback.interleaved ? 1 : i915->feedback.num_attribs;
- for (i = 0; i < n; i++) {
- pipe->winsys->buffer_unmap(pipe->winsys,
- i915->feedback_buffer[i].buffer);
- draw_set_mapped_feedback_buffer(draw, i, NULL, 0);
- }
- }
-
return TRUE;
}
@@ -320,8 +283,6 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys,
i915->pipe.clear = i915_clear;
- i915->pipe.begin_query = i915_begin_query;
- i915->pipe.end_query = i915_end_query;
i915->pipe.draw_arrays = i915_draw_arrays;
i915->pipe.draw_elements = i915_draw_elements;
diff --git a/src/mesa/pipe/i915simple/i915_context.h b/src/mesa/pipe/i915simple/i915_context.h
index dbf0c885cc..80df7f0fba 100644
--- a/src/mesa/pipe/i915simple/i915_context.h
+++ b/src/mesa/pipe/i915simple/i915_context.h
@@ -194,21 +194,15 @@ struct i915_context
const struct pipe_shader_state *fs;
struct pipe_blend_color blend_color;
- struct pipe_clear_color_state clear_color;
struct pipe_clip_state clip;
struct pipe_constant_buffer constants[PIPE_SHADER_TYPES];
- struct pipe_feedback_state feedback;
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
- uint sampler_units[PIPE_MAX_SAMPLERS];
struct i915_texture *texture[PIPE_MAX_SAMPLERS];
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
- /** Feedback buffers */
- struct pipe_feedback_buffer feedback_buffer[PIPE_MAX_FEEDBACK_ATTRIBS];
-
unsigned dirty;
unsigned *batch_start;
diff --git a/src/mesa/pipe/i915simple/i915_state.c b/src/mesa/pipe/i915simple/i915_state.c
index 038fd623ea..2a9a587a37 100644
--- a/src/mesa/pipe/i915simple/i915_state.c
+++ b/src/mesa/pipe/i915simple/i915_state.c
@@ -431,14 +431,6 @@ static void i915_set_polygon_stipple( struct pipe_context *pipe,
{
}
-static void i915_set_sampler_units(struct pipe_context *pipe,
- uint numSamplers, const uint *units)
-{
- struct i915_context *i915 = i915_context(pipe);
- uint i;
- for (i = 0; i < numSamplers; i++)
- i915->sampler_units[i] = units[i];
-}
static void * i915_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
@@ -523,13 +515,13 @@ static void i915_set_constant_buffer(struct pipe_context *pipe,
}
-static void i915_set_texture_state(struct pipe_context *pipe,
- unsigned unit,
- struct pipe_texture *texture)
+static void i915_set_sampler_texture(struct pipe_context *pipe,
+ unsigned sampler,
+ struct pipe_texture *texture)
{
struct i915_context *i915 = i915_context(pipe);
- i915->texture[unit] = (struct i915_texture*)texture; /* ptr, not struct */
+ i915->texture[sampler] = (struct i915_texture*)texture; /* ptr, not struct */
i915->dirty |= I915_NEW_TEXTURE;
}
@@ -547,44 +539,6 @@ static void i915_set_framebuffer_state(struct pipe_context *pipe,
}
-static void
-i915_set_feedback_state(struct pipe_context *pipe,
- const struct pipe_feedback_state *feedback)
-{
- struct i915_context *i915 = i915_context(pipe);
- i915->feedback = *feedback;
- draw_set_feedback_state(i915->draw, feedback);
-}
-
-
-static void
-i915_set_feedback_buffer(struct pipe_context *pipe,
- unsigned index,
- const struct pipe_feedback_buffer *feedback)
-{
- struct i915_context *i915 = i915_context(pipe);
-
- assert(index < PIPE_MAX_FEEDBACK_ATTRIBS);
-
- /* Need to be careful with referencing */
- pipe->winsys->buffer_reference(pipe->winsys,
- &i915->feedback_buffer[index].buffer,
- feedback->buffer);
- i915->feedback_buffer[index].size = feedback->size;
- i915->feedback_buffer[index].start_offset = feedback->start_offset;
-}
-
-
-
-static void i915_set_clear_color_state(struct pipe_context *pipe,
- const struct pipe_clear_color_state *clear)
-{
- struct i915_context *i915 = i915_context(pipe);
-
- i915->clear_color = *clear; /* struct copy */
-}
-
-
static void i915_set_clip_state( struct pipe_context *pipe,
const struct pipe_clip_state *clip )
@@ -748,16 +702,12 @@ i915_init_state_functions( struct i915_context *i915 )
i915->pipe.set_blend_color = i915_set_blend_color;
i915->pipe.set_clip_state = i915_set_clip_state;
- i915->pipe.set_clear_color_state = i915_set_clear_color_state;
i915->pipe.set_constant_buffer = i915_set_constant_buffer;
i915->pipe.set_framebuffer_state = i915_set_framebuffer_state;
- i915->pipe.set_feedback_state = i915_set_feedback_state;
- i915->pipe.set_feedback_buffer = i915_set_feedback_buffer;
i915->pipe.set_polygon_stipple = i915_set_polygon_stipple;
- i915->pipe.set_sampler_units = i915_set_sampler_units;
i915->pipe.set_scissor_state = i915_set_scissor_state;
- i915->pipe.set_texture_state = i915_set_texture_state;
+ i915->pipe.set_sampler_texture = i915_set_sampler_texture;
i915->pipe.set_viewport_state = i915_set_viewport_state;
i915->pipe.set_vertex_buffer = i915_set_vertex_buffer;
i915->pipe.set_vertex_element = i915_set_vertex_element;
diff --git a/src/mesa/pipe/i915simple/i915_surface.c b/src/mesa/pipe/i915simple/i915_surface.c
index bd6fd32704..f93a75b0f0 100644
--- a/src/mesa/pipe/i915simple/i915_surface.c
+++ b/src/mesa/pipe/i915simple/i915_surface.c
@@ -238,11 +238,12 @@ i915_get_tex_surface(struct pipe_context *pipe,
assert(zslice == 0);
}
- ps = pipe->winsys->surface_alloc(pipe->winsys, pt->format);
+ ps = pipe->winsys->surface_alloc(pipe->winsys);
if (ps) {
assert(ps->format);
assert(ps->refcount);
pipe->winsys->buffer_reference(pipe->winsys, &ps->buffer, tex->buffer);
+ ps->format = pt->format;
ps->cpp = pt->cpp;
ps->width = pt->width[level];
ps->height = pt->height[level];
diff --git a/src/mesa/pipe/i965simple/Makefile b/src/mesa/pipe/i965simple/Makefile
new file mode 100644
index 0000000000..460bc52c35
--- /dev/null
+++ b/src/mesa/pipe/i965simple/Makefile
@@ -0,0 +1,75 @@
+
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i965simple
+
+DRIVER_SOURCES = \
+ brw_blit.c \
+ brw_flush.c \
+ brw_strings.c \
+ brw_surface.c \
+ brw_cc.c \
+ brw_clip.c \
+ brw_clip_line.c \
+ brw_clip_point.c \
+ brw_clip_state.c \
+ brw_clip_tri.c \
+ brw_clip_util.c \
+ brw_context.c \
+ brw_curbe.c \
+ brw_draw.c \
+ brw_draw_upload.c \
+ brw_eu.c \
+ brw_eu_debug.c \
+ brw_eu_emit.c \
+ brw_eu_util.c \
+ brw_gs.c \
+ brw_gs_emit.c \
+ brw_gs_state.c \
+ brw_misc_state.c \
+ brw_program.c \
+ brw_regions.c \
+ brw_sf.c \
+ brw_sf_emit.c \
+ brw_sf_state.c \
+ brw_state.c \
+ brw_state_batch.c \
+ brw_state_cache.c \
+ brw_state_pool.c \
+ brw_state_upload.c \
+ brw_tex.c \
+ brw_tex_layout.c \
+ brw_urb.c \
+ brw_util.c \
+ brw_vs.c \
+ brw_vs_constval.c \
+ brw_vs_emit.c \
+ brw_vs_state.c \
+ brw_vtbl.c \
+ brw_wm.c \
+ brw_wm_debug.c \
+ brw_wm_emit.c \
+ brw_wm_fp.c \
+ brw_wm_iz.c \
+ brw_wm_glsl.c \
+ brw_wm_pass0.c \
+ brw_wm_pass1.c \
+ brw_wm_pass2.c \
+ brw_wm_sampler_state.c \
+ brw_wm_state.c \
+ brw_wm_surface_state.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(COMMON_BM_SOURCES) \
+ $(MINIGLX_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+DRIVER_DEFINES = -I.
+
+include ../Makefile.template
+
+symlinks:
diff --git a/src/mesa/pipe/i965simple/brw_batch.h b/src/mesa/pipe/i965simple/brw_batch.h
new file mode 100644
index 0000000000..7c778f360b
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_batch.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_BATCH_H
+#define BRW_BATCH_H
+
+#include "brw_winsys.h"
+
+#define BATCH_LOCALS
+
+#define INTEL_BATCH_NO_CLIPRECTS 0x1
+#define INTEL_BATCH_CLIPRECTS 0x2
+
+#define BEGIN_BATCH( dwords, relocs ) \
+ (brw->batch_start = brw->winsys->batch_start(brw->winsys, dwords, relocs))
+
+#define OUT_BATCH( dword ) \
+ brw->winsys->batch_dword(brw->winsys, dword)
+
+#define OUT_RELOC( buf, flags, delta ) \
+ brw->winsys->batch_reloc(brw->winsys, buf, flags, delta)
+
+#define ADVANCE_BATCH()
+
+/* XXX: this is bogus - need proper handling for out-of-memory in batchbuffer.
+ */
+#define FLUSH_BATCH(fence) do { \
+ brw->winsys->batch_flush(brw->winsys, fence); \
+ brw->batch_start = NULL; \
+ brw->hardware_dirty = ~0; \
+} while (0)
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_blit.c b/src/mesa/pipe/i965simple/brw_blit.c
new file mode 100644
index 0000000000..4692129e40
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_blit.c
@@ -0,0 +1,217 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdio.h>
+#include <errno.h>
+
+#include "brw_batch.h"
+#include "brw_blit.h"
+#include "brw_context.h"
+#include "brw_reg.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_winsys.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+
+void brw_fill_blit(struct brw_context *brw,
+ unsigned cpp,
+ short dst_pitch,
+ struct pipe_buffer_handle *dst_buffer,
+ unsigned dst_offset,
+ boolean dst_tiled,
+ short x, short y,
+ short w, short h,
+ unsigned color)
+{
+ unsigned BR13, CMD;
+ BATCH_LOCALS;
+
+ dst_pitch *= cpp;
+
+ switch(cpp) {
+ case 1:
+ case 2:
+ case 3:
+ BR13 = (0xF0 << 16) | (1<<24);
+ CMD = XY_COLOR_BLT_CMD;
+ break;
+ case 4:
+ BR13 = (0xF0 << 16) | (1<<24) | (1<<25);
+ CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+ break;
+ default:
+ return;
+ }
+
+ if (dst_tiled) {
+ CMD |= XY_DST_TILED;
+ dst_pitch /= 4;
+ }
+
+ BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH( CMD );
+ OUT_BATCH( dst_pitch | BR13 );
+ OUT_BATCH( (y << 16) | x );
+ OUT_BATCH( ((y+h) << 16) | (x+w) );
+ OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, dst_offset );
+ OUT_BATCH( color );
+ ADVANCE_BATCH();
+}
+
+static unsigned translate_raster_op(unsigned logicop)
+{
+ switch(logicop) {
+ case PIPE_LOGICOP_CLEAR: return 0x00;
+ case PIPE_LOGICOP_AND: return 0x88;
+ case PIPE_LOGICOP_AND_REVERSE: return 0x44;
+ case PIPE_LOGICOP_COPY: return 0xCC;
+ case PIPE_LOGICOP_AND_INVERTED: return 0x22;
+ case PIPE_LOGICOP_NOOP: return 0xAA;
+ case PIPE_LOGICOP_XOR: return 0x66;
+ case PIPE_LOGICOP_OR: return 0xEE;
+ case PIPE_LOGICOP_NOR: return 0x11;
+ case PIPE_LOGICOP_EQUIV: return 0x99;
+ case PIPE_LOGICOP_INVERT: return 0x55;
+ case PIPE_LOGICOP_OR_REVERSE: return 0xDD;
+ case PIPE_LOGICOP_COPY_INVERTED: return 0x33;
+ case PIPE_LOGICOP_OR_INVERTED: return 0xBB;
+ case PIPE_LOGICOP_NAND: return 0x77;
+ case PIPE_LOGICOP_SET: return 0xFF;
+ default: return 0;
+ }
+}
+
+
+/* Copy BitBlt
+ */
+void brw_copy_blit(struct brw_context *brw,
+ unsigned cpp,
+ short src_pitch,
+ struct pipe_buffer_handle *src_buffer,
+ unsigned src_offset,
+ boolean src_tiled,
+ short dst_pitch,
+ struct pipe_buffer_handle *dst_buffer,
+ unsigned dst_offset,
+ boolean dst_tiled,
+ short src_x, short src_y,
+ short dst_x, short dst_y,
+ short w, short h,
+ unsigned logic_op)
+{
+ unsigned CMD, BR13;
+ int dst_y2 = dst_y + h;
+ int dst_x2 = dst_x + w;
+ BATCH_LOCALS;
+
+
+ DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n",
+ __FUNCTION__,
+ src_buffer, src_pitch, src_x, src_y,
+ dst_buffer, dst_pitch, dst_x, dst_y,
+ w,h,logic_op);
+
+ assert( logic_op - PIPE_LOGICOP_CLEAR >= 0 );
+ assert( logic_op - PIPE_LOGICOP_CLEAR < 0x10 );
+
+ src_pitch *= cpp;
+ dst_pitch *= cpp;
+
+ switch(cpp) {
+ case 1:
+ case 2:
+ case 3:
+ BR13 = (translate_raster_op(logic_op) << 16) | (1<<24);
+ CMD = XY_SRC_COPY_BLT_CMD;
+ break;
+ case 4:
+ BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) |
+ (1<<25);
+ CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+ break;
+ default:
+ return;
+ }
+
+ if (src_tiled) {
+ CMD |= XY_SRC_TILED;
+ src_pitch /= 4;
+ }
+
+ if (dst_tiled) {
+ CMD |= XY_DST_TILED;
+ dst_pitch /= 4;
+ }
+
+ if (dst_y2 < dst_y ||
+ dst_x2 < dst_x) {
+ return;
+ }
+
+ dst_pitch &= 0xffff;
+ src_pitch &= 0xffff;
+
+ /* Initial y values don't seem to work with negative pitches. If
+ * we adjust the offsets manually (below), it seems to work fine.
+ *
+ * On the other hand, if we always adjust, the hardware doesn't
+ * know which blit directions to use, so overlapping copypixels get
+ * the wrong result.
+ */
+ if (dst_pitch > 0 && src_pitch > 0) {
+ BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH( CMD );
+ OUT_BATCH( dst_pitch | BR13 );
+ OUT_BATCH( (dst_y << 16) | dst_x );
+ OUT_BATCH( (dst_y2 << 16) | dst_x2 );
+ OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE,
+ dst_offset );
+ OUT_BATCH( (src_y << 16) | src_x );
+ OUT_BATCH( src_pitch );
+ OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ,
+ src_offset );
+ ADVANCE_BATCH();
+ }
+ else {
+ BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH( CMD );
+ OUT_BATCH( (dst_pitch & 0xffff) | BR13 );
+ OUT_BATCH( (0 << 16) | dst_x );
+ OUT_BATCH( (h << 16) | dst_x2 );
+ OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE,
+ dst_offset + dst_y * dst_pitch );
+ OUT_BATCH( (src_pitch & 0xffff) );
+ OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ,
+ src_offset + src_y * src_pitch );
+ ADVANCE_BATCH();
+ }
+}
+
+
+
diff --git a/src/mesa/pipe/i965simple/brw_blit.h b/src/mesa/pipe/i965simple/brw_blit.h
new file mode 100644
index 0000000000..371a135375
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_blit.h
@@ -0,0 +1,32 @@
+#ifndef BRW_BLIT_H
+#define BRW_BLIT_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_buffer_handle;
+struct brw_context;
+
+void brw_fill_blit(struct brw_context *intel,
+ unsigned cpp,
+ short dst_pitch,
+ struct pipe_buffer_handle *dst_buffer,
+ unsigned dst_offset,
+ boolean dst_tiled,
+ short x, short y,
+ short w, short h,
+ unsigned color);
+void brw_copy_blit(struct brw_context *intel,
+ unsigned cpp,
+ short src_pitch,
+ struct pipe_buffer_handle *src_buffer,
+ unsigned src_offset,
+ boolean src_tiled,
+ short dst_pitch,
+ struct pipe_buffer_handle *dst_buffer,
+ unsigned dst_offset,
+ boolean dst_tiled,
+ short src_x, short src_y,
+ short dst_x, short dst_y,
+ short w, short h,
+ unsigned logic_op);
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_cc.c b/src/mesa/pipe/i965simple/brw_cc.c
new file mode 100644
index 0000000000..fc7fdba53f
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_cc.c
@@ -0,0 +1,268 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+
+
+static int brw_translate_compare_func(int func)
+{
+ switch(func) {
+ case PIPE_FUNC_NEVER:
+ return BRW_COMPAREFUNCTION_NEVER;
+ case PIPE_FUNC_LESS:
+ return BRW_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_LEQUAL:
+ return BRW_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_GREATER:
+ return BRW_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_GEQUAL:
+ return BRW_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_NOTEQUAL:
+ return BRW_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_EQUAL:
+ return BRW_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return BRW_COMPAREFUNCTION_ALWAYS;
+ }
+
+ fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+ return BRW_COMPAREFUNCTION_ALWAYS;
+}
+
+static int brw_translate_stencil_op(int op)
+{
+ switch(op) {
+ case PIPE_STENCIL_OP_KEEP:
+ return BRW_STENCILOP_KEEP;
+ case PIPE_STENCIL_OP_ZERO:
+ return BRW_STENCILOP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE:
+ return BRW_STENCILOP_REPLACE;
+ case PIPE_STENCIL_OP_INCR:
+ return BRW_STENCILOP_INCRSAT;
+ case PIPE_STENCIL_OP_DECR:
+ return BRW_STENCILOP_DECRSAT;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return BRW_STENCILOP_INCR;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return BRW_STENCILOP_DECR;
+ case PIPE_STENCIL_OP_INVERT:
+ return BRW_STENCILOP_INVERT;
+ default:
+ return BRW_STENCILOP_ZERO;
+ }
+}
+
+
+static int brw_translate_logic_op(int opcode)
+{
+ switch(opcode) {
+ case PIPE_LOGICOP_CLEAR:
+ return BRW_LOGICOPFUNCTION_CLEAR;
+ case PIPE_LOGICOP_AND:
+ return BRW_LOGICOPFUNCTION_AND;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return BRW_LOGICOPFUNCTION_AND_REVERSE;
+ case PIPE_LOGICOP_COPY:
+ return BRW_LOGICOPFUNCTION_COPY;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return BRW_LOGICOPFUNCTION_COPY_INVERTED;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return BRW_LOGICOPFUNCTION_AND_INVERTED;
+ case PIPE_LOGICOP_NOOP:
+ return BRW_LOGICOPFUNCTION_NOOP;
+ case PIPE_LOGICOP_XOR:
+ return BRW_LOGICOPFUNCTION_XOR;
+ case PIPE_LOGICOP_OR:
+ return BRW_LOGICOPFUNCTION_OR;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return BRW_LOGICOPFUNCTION_OR_INVERTED;
+ case PIPE_LOGICOP_NOR:
+ return BRW_LOGICOPFUNCTION_NOR;
+ case PIPE_LOGICOP_EQUIV:
+ return BRW_LOGICOPFUNCTION_EQUIV;
+ case PIPE_LOGICOP_INVERT:
+ return BRW_LOGICOPFUNCTION_INVERT;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return BRW_LOGICOPFUNCTION_OR_REVERSE;
+ case PIPE_LOGICOP_NAND:
+ return BRW_LOGICOPFUNCTION_NAND;
+ case PIPE_LOGICOP_SET:
+ return BRW_LOGICOPFUNCTION_SET;
+ default:
+ return BRW_LOGICOPFUNCTION_SET;
+ }
+}
+
+
+static void upload_cc_vp( struct brw_context *brw )
+{
+ struct brw_cc_viewport ccv;
+
+ memset(&ccv, 0, sizeof(ccv));
+
+ ccv.min_depth = 0.0;
+ ccv.max_depth = 1.0;
+
+ brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv );
+}
+
+const struct brw_tracked_state brw_cc_vp = {
+ .dirty = {
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .update = upload_cc_vp
+};
+
+
+static void upload_cc_unit( struct brw_context *brw )
+{
+ struct brw_cc_unit_state cc;
+
+ memset(&cc, 0, sizeof(cc));
+
+ /* BRW_NEW_DEPTH_STENCIL */
+ if (brw->attribs.DepthStencil->stencil.front_enabled) {
+ cc.cc0.stencil_enable = brw->attribs.DepthStencil->stencil.front_enabled;
+ cc.cc0.stencil_func = brw_translate_compare_func(brw->attribs.DepthStencil->stencil.front_func);
+ cc.cc0.stencil_fail_op = brw_translate_stencil_op(brw->attribs.DepthStencil->stencil.front_fail_op);
+ cc.cc0.stencil_pass_depth_fail_op = brw_translate_stencil_op(
+ brw->attribs.DepthStencil->stencil.front_zfail_op);
+ cc.cc0.stencil_pass_depth_pass_op = brw_translate_stencil_op(
+ brw->attribs.DepthStencil->stencil.front_zpass_op);
+ cc.cc1.stencil_ref = brw->attribs.DepthStencil->stencil.ref_value[0];
+ cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil.write_mask[0];
+ cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil.value_mask[0];
+
+ if (brw->attribs.DepthStencil->stencil.back_enabled) {
+ cc.cc0.bf_stencil_enable = brw->attribs.DepthStencil->stencil.back_enabled;
+ cc.cc0.bf_stencil_func = brw_translate_compare_func(
+ brw->attribs.DepthStencil->stencil.back_func);
+ cc.cc0.bf_stencil_fail_op = brw_translate_stencil_op(
+ brw->attribs.DepthStencil->stencil.back_fail_op);
+ cc.cc0.bf_stencil_pass_depth_fail_op = brw_translate_stencil_op(
+ brw->attribs.DepthStencil->stencil.back_zfail_op);
+ cc.cc0.bf_stencil_pass_depth_pass_op = brw_translate_stencil_op(
+ brw->attribs.DepthStencil->stencil.back_zpass_op);
+ cc.cc1.bf_stencil_ref = brw->attribs.DepthStencil->stencil.ref_value[1];
+ cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil.write_mask[1];
+ cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil.value_mask[1];
+ }
+
+ /* Not really sure about this:
+ */
+ if (brw->attribs.DepthStencil->stencil.write_mask[0] ||
+ (brw->attribs.DepthStencil->stencil.back_enabled &&
+ brw->attribs.DepthStencil->stencil.write_mask[1]))
+ cc.cc0.stencil_write_enable = 1;
+ }
+
+ /* BRW_NEW_BLEND */
+ if (brw->attribs.Blend->logicop_enable) {
+ cc.cc2.logicop_enable = 1;
+ cc.cc5.logicop_func = brw_translate_logic_op( brw->attribs.Blend->logicop_func );
+ }
+ else if (brw->attribs.Blend->blend_enable) {
+ int eqRGB = brw->attribs.Blend->rgb_func;
+ int eqA = brw->attribs.Blend->alpha_func;
+ int srcRGB = brw->attribs.Blend->rgb_src_factor;
+ int dstRGB = brw->attribs.Blend->rgb_dst_factor;
+ int srcA = brw->attribs.Blend->alpha_src_factor;
+ int dstA = brw->attribs.Blend->alpha_dst_factor;
+
+ if (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX) {
+ srcRGB = dstRGB = PIPE_BLENDFACTOR_ONE;
+ }
+
+ if (eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX) {
+ srcA = dstA = PIPE_BLENDFACTOR_ONE;
+ }
+
+ cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+ cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
+ cc.cc6.blend_function = brw_translate_blend_equation( eqRGB );
+
+ cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+ cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
+ cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA );
+
+ cc.cc3.blend_enable = 1;
+ cc.cc3.ia_blend_enable = (srcA != srcRGB ||
+ dstA != dstRGB ||
+ eqA != eqRGB);
+ }
+
+ /* BRW_NEW_ALPHATEST
+ */
+ if (brw->attribs.AlphaTest->enabled) {
+ cc.cc3.alpha_test = 1;
+ cc.cc3.alpha_test_func = brw_translate_compare_func(brw->attribs.AlphaTest->func);
+
+ UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], brw->attribs.AlphaTest->ref);
+
+ cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+ }
+
+ if (brw->attribs.Blend->dither) {
+ cc.cc5.dither_enable = 1;
+ cc.cc6.y_dither_offset = 0;
+ cc.cc6.x_dither_offset = 0;
+ }
+
+ if (brw->attribs.DepthStencil->depth.enabled) {
+ cc.cc2.depth_test = brw->attribs.DepthStencil->depth.enabled;
+ cc.cc2.depth_test_function = brw_translate_compare_func(brw->attribs.DepthStencil->depth.func);
+ cc.cc2.depth_write_enable = brw->attribs.DepthStencil->depth.writemask;
+ }
+
+ /* CACHE_NEW_CC_VP */
+ cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ cc.cc5.statistics_enable = 1;
+
+ brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc );
+}
+
+const struct brw_tracked_state brw_cc_unit = {
+ .dirty = {
+ .brw = BRW_NEW_DEPTH_STENCIL | BRW_NEW_BLEND | BRW_NEW_ALPHA_TEST,
+ .cache = CACHE_NEW_CC_VP
+ },
+ .update = upload_cc_unit
+};
+
diff --git a/src/mesa/pipe/i965simple/brw_clip.c b/src/mesa/pipe/i965simple/brw_clip.c
new file mode 100644
index 0000000000..fc86e0a446
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_clip.c
@@ -0,0 +1,201 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_clip.h"
+
+#define FRONT_UNFILLED_BIT 0x1
+#define BACK_UNFILLED_BIT 0x2
+
+
+static void compile_clip_prog( struct brw_context *brw,
+ struct brw_clip_prog_key *key )
+{
+ struct brw_clip_compile c;
+ const unsigned *program;
+ unsigned program_size;
+ unsigned delta;
+ unsigned i;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(&c.func);
+
+ c.func.single_program_flow = 1;
+
+ c.key = *key;
+
+
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.header_position_offset = ATTR_SIZE;
+
+ for (i = 0, delta = REG_SIZE; i < PIPE_MAX_SHADER_OUTPUTS; i++)
+ if (c.key.attrs & (1<<i)) {
+ c.offset[i] = delta;
+ delta += ATTR_SIZE;
+ }
+
+ c.nr_attrs = brw_count_bits(c.key.attrs);
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+ c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Would ideally have the option of producing a program which could
+ * do all three:
+ */
+ switch (key->primitive) {
+ case PIPE_PRIM_TRIANGLES:
+#if 0
+ if (key->do_unfilled)
+ brw_emit_unfilled_clip( &c );
+ else
+#endif
+ brw_emit_tri_clip( &c );
+ break;
+ case PIPE_PRIM_LINES:
+ brw_emit_line_clip( &c );
+ break;
+ case PIPE_PRIM_POINTS:
+ brw_emit_point_clip( &c );
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ /* Upload
+ */
+ brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG],
+ &c.key,
+ sizeof(c.key),
+ program,
+ program_size,
+ &c.prog_data,
+ &brw->clip.prog_data );
+}
+
+
+static boolean search_cache( struct brw_context *brw,
+ struct brw_clip_prog_key *key )
+{
+ return brw_search_cache(&brw->cache[BRW_CLIP_PROG],
+ key, sizeof(*key),
+ &brw->clip.prog_data,
+ &brw->clip.prog_gs_offset);
+}
+
+
+
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_clip_prog(struct brw_context *brw)
+{
+ struct brw_clip_prog_key key;
+
+ memset(&key, 0, sizeof(key));
+
+ /* Populate the key:
+ */
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ key.primitive = brw->reduced_primitive;
+ /* CACHE_NEW_VS_PROG */
+ key.attrs = brw->vs.prog_data->outputs_written;
+ /* BRW_NEW_RASTER */
+ key.do_flat_shading = (brw->attribs.Raster->flatshade);
+ /* BRW_NEW_CLIP */
+ key.nr_userclip = brw->attribs.Clip.nr; /* XXX */
+ key.clip_mode = BRW_CLIPMODE_NORMAL;
+
+ if (key.primitive == PIPE_PRIM_TRIANGLES) {
+ if (brw->attribs.Raster->cull_mode == PIPE_WINDING_BOTH)
+ key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
+ else {
+ if (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL)
+ key.do_unfilled = 1;
+
+ /* Most cases the fixed function units will handle. Cases where
+ * one or more polygon faces are unfilled will require help:
+ */
+ if (key.do_unfilled) {
+ key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+
+ if (brw->attribs.Raster->offset_cw ||
+ brw->attribs.Raster->offset_ccw) {
+ key.offset_units = brw->attribs.Raster->offset_units;
+ key.offset_factor = brw->attribs.Raster->offset_scale;
+ }
+ key.fill_ccw = brw->attribs.Raster->fill_ccw;
+ key.fill_cw = brw->attribs.Raster->fill_cw;
+ key.offset_ccw = brw->attribs.Raster->offset_ccw;
+ key.offset_cw = brw->attribs.Raster->offset_cw;
+ if (brw->attribs.Raster->light_twoside &&
+ key.fill_cw != CLIP_CULL)
+ key.copy_bfc_cw = 1;
+ }
+ }
+ }
+
+ if (!search_cache(brw, &key))
+ compile_clip_prog( brw, &key );
+}
+
+const struct brw_tracked_state brw_clip_prog = {
+ .dirty = {
+ .brw = (BRW_NEW_RASTERIZER |
+ BRW_NEW_CLIP |
+ BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .update = upload_clip_prog
+};
diff --git a/src/mesa/pipe/i965simple/brw_clip.h b/src/mesa/pipe/i965simple/brw_clip.h
new file mode 100644
index 0000000000..a89d08b791
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_clip.h
@@ -0,0 +1,170 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef BRW_CLIP_H
+#define BRW_CLIP_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_VERTS (3+6+6)
+
+/* Note that if unfilled primitives are being emitted, we have to fix
+ * up polygon offset and flatshading at this point:
+ */
+struct brw_clip_prog_key {
+ unsigned attrs:32;
+ unsigned primitive:4;
+ unsigned nr_userclip:3;
+ unsigned do_flat_shading:1;
+ unsigned do_unfilled:1;
+ unsigned fill_cw:2; /* includes cull information */
+ unsigned fill_ccw:2; /* includes cull information */
+ unsigned offset_cw:1;
+ unsigned offset_ccw:1;
+ unsigned pad0:17;
+
+ unsigned copy_bfc_cw:1;
+ unsigned copy_bfc_ccw:1;
+ unsigned clip_mode:3;
+ unsigned pad1:27;
+
+ float offset_factor;
+ float offset_units;
+};
+
+
+#define CLIP_LINE 0
+#define CLIP_POINT 1
+#define CLIP_FILL 2
+#define CLIP_CULL 3
+
+
+#define PRIM_MASK (0x1f)
+
+struct brw_clip_compile {
+ struct brw_compile func;
+ struct brw_clip_prog_key key;
+ struct brw_clip_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_VERTS];
+
+ struct brw_reg t;
+ struct brw_reg t0, t1;
+ struct brw_reg dp0, dp1;
+
+ struct brw_reg dpPrev;
+ struct brw_reg dp;
+ struct brw_reg loopcount;
+ struct brw_reg nr_verts;
+ struct brw_reg planemask;
+
+ struct brw_reg inlist;
+ struct brw_reg outlist;
+ struct brw_reg freelist;
+
+ struct brw_reg dir;
+ struct brw_reg tmp0, tmp1;
+ struct brw_reg offset;
+
+ struct brw_reg fixed_planes;
+ struct brw_reg plane_equation;
+ } reg;
+
+ /* 3 different ways of expressing vertex size:
+ */
+ unsigned nr_attrs;
+ unsigned nr_regs;
+ unsigned nr_bytes;
+
+ unsigned first_tmp;
+ unsigned last_tmp;
+
+ boolean need_direction;
+
+ unsigned last_mrf;
+
+ unsigned header_position_offset;
+ unsigned offset[PIPE_ATTRIB_MAX];
+};
+
+#define ATTR_SIZE (4*4)
+
+/* Points are only culled, so no need for a clip routine, however it
+ * works out easier to have a dummy one.
+ */
+void brw_emit_unfilled_clip( struct brw_clip_compile *c );
+void brw_emit_tri_clip( struct brw_clip_compile *c );
+void brw_emit_line_clip( struct brw_clip_compile *c );
+void brw_emit_point_clip( struct brw_clip_compile *c );
+
+/* brw_clip_tri.c, for use by the unfilled clip routine:
+ */
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c );
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c );
+void brw_clip_tri( struct brw_clip_compile *c );
+void brw_clip_tri_emit_polygon( struct brw_clip_compile *c );
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ unsigned nr_verts );
+
+
+/* Utils:
+ */
+
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ boolean force_edgeflag );
+
+void brw_clip_init_planes( struct brw_clip_compile *c );
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ boolean allocate,
+ boolean eot,
+ unsigned header);
+
+void brw_clip_kill_thread(struct brw_clip_compile *c);
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c );
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c );
+
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ unsigned to, unsigned from );
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c );
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_clip_line.c b/src/mesa/pipe/i965simple/brw_clip_line.c
new file mode 100644
index 0000000000..75d9e5fcda
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_clip_line.c
@@ -0,0 +1,245 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
+{
+ unsigned i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < 4; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.t0 = brw_vec1_grf(i, 1);
+ c->reg.t1 = brw_vec1_grf(i, 2);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp1 = brw_vec1_grf(i, 4);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+/* Line clipping, more or less following the following algorithm:
+ *
+ * for (p=0;p<MAX_PLANES;p++) {
+ * if (clipmask & (1 << p)) {
+ * float dp0 = DOTPROD( vtx0, plane[p] );
+ * float dp1 = DOTPROD( vtx1, plane[p] );
+ *
+ * if (IS_NEGATIVE(dp1)) {
+ * float t = dp1 / (dp1 - dp0);
+ * if (t > t1) t1 = t;
+ * } else {
+ * float t = dp0 / (dp0 - dp1);
+ * if (t > t0) t0 = t;
+ * }
+ *
+ * if (t0 + t1 >= 1.0)
+ * return;
+ * }
+ * }
+ *
+ * interp( ctx, newvtx0, vtx0, vtx1, t0 );
+ * interp( ctx, newvtx1, vtx1, vtx0, t1 );
+ *
+ */
+static void clip_and_emit_line( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx0 = brw_indirect(0, 0);
+ struct brw_indirect vtx1 = brw_indirect(1, 0);
+ struct brw_indirect newvtx0 = brw_indirect(2, 0);
+ struct brw_indirect newvtx1 = brw_indirect(3, 0);
+ struct brw_indirect plane_ptr = brw_indirect(4, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *is_negative;
+ struct brw_instruction *is_neg2;
+ struct brw_instruction *not_culled;
+ struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3]));
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+
+ /* Note: init t0, t1 together:
+ */
+ brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));
+
+ brw_clip_init_planes(c);
+ brw_clip_init_clipmask(c);
+
+ /* -ve rhw workaround */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+#if 0
+ /* dp = DP4(vtx->position, plane)
+ */
+ brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+
+ /* if (IS_NEGATIVE(dp1))
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+#else
+ #warning "disabled"
+#endif
+ is_negative = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
+ brw_MOV(p, c->reg.t1, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ is_negative = brw_ELSE(p, is_negative);
+ {
+ /* Coming back in. We know that both cannot be negative
+ * because the line would have been culled in that case.
+ */
+
+ /* If both are positive, do nothing */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+ brw_MOV(p, c->reg.t0, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ brw_ENDIF(p, is_neg2);
+ }
+ brw_ENDIF(p, is_negative);
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* while (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+
+ brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
+ not_culled = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE);
+ brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE);
+
+ brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, not_culled);
+ brw_clip_kill_thread(c);
+}
+
+
+
+void brw_emit_line_clip( struct brw_clip_compile *c )
+{
+ brw_clip_line_alloc_regs(c);
+
+ if (c->key.do_flat_shading)
+ brw_clip_copy_colors(c, 0, 1);
+
+ clip_and_emit_line(c);
+}
diff --git a/src/mesa/pipe/i965simple/brw_clip_point.c b/src/mesa/pipe/i965simple/brw_clip_point.c
new file mode 100644
index 0000000000..6fce7210d1
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_clip_point.c
@@ -0,0 +1,47 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+/* Point clipping, nothing to do?
+ */
+void brw_emit_point_clip( struct brw_clip_compile *c )
+{
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_tri_alloc_regs(c, 0);
+ brw_clip_kill_thread(c);
+}
diff --git a/src/mesa/pipe/i965simple/brw_clip_state.c b/src/mesa/pipe/i965simple/brw_clip_state.c
new file mode 100644
index 0000000000..51a4666a0b
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_clip_state.c
@@ -0,0 +1,92 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+
+static void upload_clip_unit( struct brw_context *brw )
+{
+ struct brw_clip_unit_state clip;
+
+ memset(&clip, 0, sizeof(clip));
+
+ /* CACHE_NEW_CLIP_PROG */
+ clip.thread0.grf_reg_count =
+ ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1;
+ clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6;
+ clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+ clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length;
+ clip.clip5.clip_mode = brw->clip.prog_data->clip_mode;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+
+ /* BRW_NEW_URB_FENCE */
+ clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries;
+ clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+ clip.thread4.max_threads = 1; /* 2 threads */
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ clip.thread4.stats_enable = 1;
+
+ /* CONSTANT */
+ clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ clip.thread1.single_program_flow = 1;
+ clip.thread3.dispatch_grf_start_reg = 1;
+ clip.thread3.urb_entry_read_offset = 0;
+ clip.clip5.userclip_enable_flags = 0x7f;
+ clip.clip5.userclip_must_clip = 1;
+ clip.clip5.guard_band_enable = 0;
+ clip.clip5.viewport_z_clip_enable = 1;
+ clip.clip5.viewport_xy_clip_enable = 1;
+ clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+ clip.clip5.api_mode = BRW_CLIP_API_OGL;
+ clip.clip6.clipper_viewport_state_ptr = 0;
+ clip.viewport_xmin = -1;
+ clip.viewport_xmax = 1;
+ clip.viewport_ymin = -1;
+ clip.viewport_ymax = 1;
+
+ brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip );
+}
+
+
+const struct brw_tracked_state brw_clip_unit = {
+ .dirty = {
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_CLIP_PROG
+ },
+ .update = upload_clip_unit
+};
diff --git a/src/mesa/pipe/i965simple/brw_clip_tri.c b/src/mesa/pipe/i965simple/brw_clip_tri.c
new file mode 100644
index 0000000000..c5da7b825e
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_clip_tri.c
@@ -0,0 +1,566 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+static struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+ struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmps( struct brw_clip_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ unsigned nr_verts )
+{
+ unsigned i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ if (c->nr_attrs & 1) {
+ for (j = 0; j < 3; j++) {
+ unsigned delta = c->nr_attrs*16 + 32;
+ brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
+ }
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_UD);
+ c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp = brw_vec1_grf(i, 4);
+ i++;
+
+ c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+ if (c->key.do_unfilled) {
+ c->reg.dir = brw_vec4_grf(i, 0);
+ c->reg.offset = brw_vec4_grf(i, 4);
+ i++;
+ c->reg.tmp0 = brw_vec4_grf(i, 0);
+ c->reg.tmp1 = brw_vec4_grf(i, 4);
+ i++;
+ }
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+ struct brw_instruction *is_rev;
+
+ /* Initial list of indices for incoming vertexes:
+ */
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
+
+ /* XXX: Is there an easier way to do this? Need to reverse every
+ * second tristrip element: Can ignore sometimes?
+ */
+ is_rev = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(-1));
+ }
+ is_rev = brw_ELSE(p, is_rev);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(1));
+ }
+ brw_ENDIF(p, is_rev);
+
+ brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
+}
+
+
+
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_copy_colors(c, 1, 0);
+ brw_clip_copy_colors(c, 2, 0);
+ }
+ is_poly = brw_ELSE(p, is_poly);
+ {
+ brw_clip_copy_colors(c, 0, 2);
+ brw_clip_copy_colors(c, 1, 2);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+/* Use mesa's clipping algorithms, translated to GEN4 assembly.
+ */
+void brw_clip_tri( struct brw_clip_compile *c )
+{
+#if 0
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx = brw_indirect(0, 0);
+ struct brw_indirect vtxPrev = brw_indirect(1, 0);
+ struct brw_indirect vtxOut = brw_indirect(2, 0);
+ struct brw_indirect plane_ptr = brw_indirect(3, 0);
+ struct brw_indirect inlist_ptr = brw_indirect(4, 0);
+ struct brw_indirect outlist_ptr = brw_indirect(5, 0);
+ struct brw_indirect freelist_ptr = brw_indirect(6, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *vertex_loop;
+ struct brw_instruction *next_test;
+ struct brw_instruction *prev_test;
+
+ brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+
+ brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* vtxOut = freelist_ptr++
+ */
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) );
+ brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));
+
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
+
+ vertex_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* vtx = *input_ptr;
+ */
+ brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));
+
+ /* IS_NEGATIVE(prev) */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+ prev_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* IS_POSITIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+
+ /* Coming back in.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);
+
+ /* If (vtxOut == 0) vtxOut = vtxPrev
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, FALSE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+
+ }
+ prev_test = brw_ELSE(p, prev_test);
+ {
+ /* *outlist_ptr++ = vtxPrev;
+ * nr_verts++;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+
+ /* IS_NEGATIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* Going out of bounds. Avoid division by zero as we
+ * know dp != dpPrev from DIFFERENT_SIGNS, above.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);
+
+ /* If (vtxOut == 0) vtxOut = vtx
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, TRUE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+ }
+ brw_ENDIF(p, prev_test);
+
+ /* vtxPrev = vtx;
+ * inlist_ptr++;
+ */
+ brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
+ brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));
+
+ /* while (--loopcount != 0)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, vertex_loop);
+
+ /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1]
+ * inlist = outlist
+ * inlist_ptr = &inlist[0]
+ * outlist_ptr = &outlist[0]
+ */
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
+ brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
+ brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* nr_verts >= 3
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ c->reg.nr_verts,
+ brw_imm_ud(3));
+
+ /* && (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+#else
+ #warning "disabled"
+#endif
+}
+
+
+
+void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop, *if_insn;
+
+ /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p,
+ c->reg.loopcount,
+ c->reg.nr_verts,
+ brw_imm_d(-2));
+
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect vptr = brw_indirect(1, 0);
+
+ brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+
+ brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END));
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+static void do_clip_tri( struct brw_clip_compile *c )
+{
+ brw_clip_init_planes(c);
+
+ brw_clip_tri(c);
+}
+
+
+static void maybe_do_clip_tri( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ do_clip_tri(c);
+ }
+ brw_ENDIF(p, do_clip);
+}
+
+static void brw_clip_test( struct brw_clip_compile *c )
+{
+#if 0
+ struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+
+ struct brw_reg v0 = get_tmp(c);
+ struct brw_reg v1 = get_tmp(c);
+ struct brw_reg v2 = get_tmp(c);
+
+ struct brw_indirect vt0 = brw_indirect(0, 0);
+ struct brw_indirect vt1 = brw_indirect(1, 0);
+ struct brw_indirect vt2 = brw_indirect(2, 0);
+
+ struct brw_compile *p = &c->func;
+
+ brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
+ brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
+ brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
+
+ /* test nearz, xmin, ymin plane */
+ brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* test farz, xmax, ymax plane */
+ brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ release_tmps(c);
+#else
+ #warning "disabled"
+#endif
+}
+
+
+void brw_emit_tri_clip( struct brw_clip_compile *c )
+{
+ struct brw_instruction *neg_rhw;
+ struct brw_compile *p = &c->func;
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+ brw_clip_init_clipmask(c);
+
+ /* if -ve rhw workaround bit is set,
+ do cliptest */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ neg_rhw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_test(c);
+ }
+ brw_ENDIF(p, neg_rhw);
+
+ /* Can't push into do_clip_tri because with polygon (or quad)
+ * flatshading, need to apply the flatshade here because we don't
+ * respect the PV when converting to trifan for emit:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ if (c->key.clip_mode == BRW_CLIPMODE_NORMAL)
+ do_clip_tri(c);
+ else
+ maybe_do_clip_tri(c);
+
+ brw_clip_tri_emit_polygon(c);
+
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_kill_thread(c);
+}
diff --git a/src/mesa/pipe/i965simple/brw_clip_unfilled.c b/src/mesa/pipe/i965simple/brw_clip_unfilled.c
new file mode 100644
index 0000000000..b774a76dd6
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_clip_unfilled.c
@@ -0,0 +1,477 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+/* This is performed against the original triangles, so no indirection
+ * required:
+BZZZT!
+ */
+static void compute_tri_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg e = c->reg.tmp0;
+ struct brw_reg f = c->reg.tmp1;
+ struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]);
+ struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]);
+ struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]);
+
+
+ /* Calculate the vectors of two edges of the triangle:
+ */
+ brw_ADD(p, e, v0, negate(v2));
+ brw_ADD(p, f, v1, negate(v2));
+
+ /* Take their crossproduct:
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3));
+ brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
+}
+
+
+static void cull_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ unsigned conditional;
+
+ assert (!(c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL));
+
+ if (c->key.fill_ccw == CLIP_CULL)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+static void copy_bfc( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ unsigned conditional;
+
+ /* Do we have any colors to copy?
+ */
+ if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) &&
+ !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]))
+ return;
+
+ /* In some wierd degnerate cases we can end up testing the
+ * direction twice, once for culling and once for bfc copying. Oh
+ * well, that's what you get for setting wierd GL state.
+ */
+ if (c->key.copy_bfc_ccw)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ unsigned i;
+
+ for (i = 0; i < 3; i++) {
+ if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]),
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0]));
+
+ if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]),
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1]));
+ }
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+
+/*
+ float iz = 1.0 / dir.z;
+ float ac = dir.x * iz;
+ float bc = dir.y * iz;
+ offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
+ offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+ offset *= MRD;
+*/
+static void compute_offset( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg off = c->reg.offset;
+ struct brw_reg dir = c->reg.dir;
+
+ brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
+ brw_MUL(p, vec2(off), dir, get_element(off, 2));
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ brw_abs(get_element(off, 0)),
+ brw_abs(get_element(off, 1)));
+
+ brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
+ brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
+}
+
+
+static void merge_edgeflags( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ /* Get away with using reg.vertex because we know that this is not
+ * a _3DPRIM_TRISTRIP_REVERSE:
+ */
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
+ brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
+ brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+static void apply_one_offset( struct brw_clip_compile *c,
+ struct brw_indirect vert )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]);
+ struct brw_reg z = get_element(pos, 2);
+
+ brw_ADD(p, z, z, vec1(c->reg.offset));
+}
+
+
+
+/***********************************************************************
+ * Output clipped polygon as an unfilled primitive:
+ */
+static void emit_lines(struct brw_clip_compile *c,
+ boolean do_offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_edge;
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v1 = brw_indirect(1, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+ struct brw_indirect v1ptr = brw_indirect(3, 0);
+
+ /* Need a seperate loop for offset:
+ */
+ if (do_offset) {
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ apply_one_offset(c, v0);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+ }
+
+ /* v1ptr = &inlist[nr_verts]
+ * *v1ptr = v0
+ */
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw edge if edgeflag != 0 */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+ brw_imm_f(0));
+ draw_edge = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_edge);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+static void emit_points(struct brw_clip_compile *c,
+ boolean do_offset )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_point;
+
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw if edgeflag != 0
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+ brw_imm_f(0));
+ draw_point = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (do_offset)
+ apply_one_offset(c, v0);
+
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_point);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+
+
+
+
+static void emit_primitives( struct brw_clip_compile *c,
+ unsigned mode,
+ boolean do_offset )
+{
+ switch (mode) {
+ case CLIP_FILL:
+ brw_clip_tri_emit_polygon(c);
+ break;
+
+ case CLIP_LINE:
+ emit_lines(c, do_offset);
+ break;
+
+ case CLIP_POINT:
+ emit_points(c, do_offset);
+ break;
+
+ case CLIP_CULL:
+ assert(0);
+ break;
+ }
+}
+
+
+
+static void emit_unfilled_primitives( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+
+ /* Direction culling has already been done.
+ */
+ if (c->key.fill_ccw != c->key.fill_cw &&
+ c->key.fill_ccw != CLIP_CULL &&
+ c->key.fill_cw != CLIP_CULL)
+ {
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+ ccw = brw_ELSE(p, ccw);
+ {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ brw_ENDIF(p, ccw);
+ }
+ else if (c->key.fill_cw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ else if (c->key.fill_ccw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+}
+
+
+
+
+static void check_nr_verts( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+
+void brw_emit_unfilled_clip( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+
+ c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
+ (c->key.fill_ccw != c->key.fill_cw) ||
+ c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL ||
+ c->key.copy_bfc_cw ||
+ c->key.copy_bfc_ccw);
+
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+
+ assert(c->offset[VERT_RESULT_EDGE]);
+
+ if (c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL) {
+ brw_clip_kill_thread(c);
+ return;
+ }
+
+ merge_edgeflags(c);
+
+ /* Need to use the inlist indirection here:
+ */
+ if (c->need_direction)
+ compute_tri_direction(c);
+
+ if (c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL)
+ cull_direction(c);
+
+ if (c->key.offset_ccw ||
+ c->key.offset_cw)
+ compute_offset(c);
+
+ if (c->key.copy_bfc_ccw ||
+ c->key.copy_bfc_cw)
+ copy_bfc(c);
+
+ /* Need to do this whether we clip or not:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ brw_clip_init_clipmask(c);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_init_planes(c);
+ brw_clip_tri(c);
+ check_nr_verts(c);
+ }
+ brw_ENDIF(p, do_clip);
+
+ emit_unfilled_primitives(c);
+ brw_clip_kill_thread(c);
+}
+
+
+
diff --git a/src/mesa/pipe/i965simple/brw_clip_util.c b/src/mesa/pipe/i965simple/brw_clip_util.c
new file mode 100644
index 0000000000..6d58ceafff
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_clip_util.c
@@ -0,0 +1,351 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+
+
+static struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+ struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+
+static struct brw_reg make_plane_ud(unsigned x, unsigned y, unsigned z, unsigned w)
+{
+ return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
+}
+
+
+void brw_clip_init_planes( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+
+ if (!c->key.nr_userclip) {
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1));
+ }
+}
+
+
+
+#define W 3
+
+/* Project 'pos' to screen space (or back again), overwrite with results:
+ */
+static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+{
+ struct brw_compile *p = &c->func;
+
+ /* calc rhw
+ */
+ brw_math_invert(p, get_element(pos, W), get_element(pos, W));
+
+ /* value.xyz *= value.rhw
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, brw_writemask(pos, TGSI_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+}
+
+
+static void brw_clip_project_vertex( struct brw_clip_compile *c,
+ struct brw_indirect vert_addr )
+{
+#if 0
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+
+ /* Fixup position. Extract from the original vertex and re-project
+ * to screen space:
+ */
+ brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS]));
+ brw_clip_project_position(c, tmp);
+ brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
+
+ release_tmp(c, tmp);
+#else
+ #warning "disabled"
+#endif
+}
+
+
+
+
+/* Interpolate between two vertices and put the result into a0.0.
+ * Increment a0.0 accordingly.
+ */
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ boolean force_edgeflag)
+{
+#if 0
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+ unsigned i;
+
+ /* Just copy the vertex header:
+ */
+ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
+
+ /* Iterate over each attribute (could be done in pairs?)
+ */
+ for (i = 0; i < c->nr_attrs; i++) {
+ unsigned delta = i*16 + 32;
+
+ if (delta == c->offset[VERT_RESULT_EDGE]) {
+ if (force_edgeflag)
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
+ else
+ brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
+ }
+ else {
+ /* Interpolate:
+ *
+ * New = attr0 + t*attr1 - t*attr0
+ */
+ brw_MUL(p,
+ vec4(brw_null_reg()),
+ deref_4f(v1_ptr, delta),
+ t0);
+
+ brw_MAC(p,
+ tmp,
+ negate(deref_4f(v0_ptr, delta)),
+ t0);
+
+ brw_ADD(p,
+ deref_4f(dest_ptr, delta),
+ deref_4f(v0_ptr, delta),
+ tmp);
+ }
+ }
+
+ if (i & 1) {
+ unsigned delta = i*16 + 32;
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
+ }
+
+ release_tmp(c, tmp);
+
+ /* Recreate the projected (NDC) coordinate in the new vertex
+ * header:
+ */
+ brw_clip_project_vertex(c, dest_ptr );
+#else
+ #warning "disabled"
+#endif
+}
+
+
+
+
+#define MAX_MRF 16
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ boolean allocate,
+ boolean eot,
+ unsigned header)
+{
+ struct brw_compile *p = &c->func;
+ unsigned start = c->last_mrf;
+
+ assert(!(allocate && eot));
+
+ /* Cycle through mrf regs - probably futile as we have to wait for
+ * the allocation response anyway. Also, the order this function
+ * is invoked doesn't correspond to the order the instructions will
+ * be executed, so it won't have any effect in many cases.
+ */
+#if 0
+ if (start + c->nr_regs + 1 >= MAX_MRF)
+ start = 0;
+
+ c->last_mrf = start + c->nr_regs + 1;
+#endif
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+
+ /* Send each vertex as a seperate write to the urb. This
+ * is different to the concept in brw_sf_emit.c, where
+ * subsequent writes are used to build up a single urb
+ * entry. Each of these writes instantiates a seperate
+ * urb entry - (I think... what about 'allocate'?)
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ start,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response_length */
+ eot, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_clip_kill_thread(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+
+ /* Send an empty message to kill the thread and release any
+ * allocated urb entry:
+ */
+ brw_urb_WRITE(p,
+ retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ 0, /* allocate */
+ 0, /* used */
+ 0, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ 0,
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
+{
+ return brw_address(c->reg.fixed_planes);
+}
+
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
+{
+ if (c->key.nr_userclip) {
+ return brw_imm_uw(16);
+ }
+ else {
+ return brw_imm_uw(4);
+ }
+}
+
+
+/* If flatshading, distribute color from provoking vertex prior to
+ * clipping.
+ */
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ unsigned to, unsigned from )
+{
+#if 0
+ struct brw_compile *p = &c->func;
+
+ if (c->offset[VERT_RESULT_COL0])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0]));
+
+ if (c->offset[VERT_RESULT_COL1])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1]));
+
+ if (c->offset[VERT_RESULT_BFC0])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0]));
+
+ if (c->offset[VERT_RESULT_BFC1])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1]));
+#else
+ #warning "disabled"
+#endif
+}
+
+
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
+
+ /* Shift so that lowest outcode bit is rightmost:
+ */
+ brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
+
+ if (c->key.nr_userclip) {
+ struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
+
+ /* Rearrange userclip outcodes so that they come directly after
+ * the fixed plane bits.
+ */
+ brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+ brw_SHR(p, tmp, tmp, brw_imm_ud(8));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
+
+ release_tmp(c, tmp);
+ }
+}
+
diff --git a/src/mesa/pipe/i965simple/brw_context.c b/src/mesa/pipe/i965simple/brw_context.c
new file mode 100644
index 0000000000..13bed97ff2
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_context.c
@@ -0,0 +1,244 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "brw_vs.h"
+#include "brw_tex_layout.h"
+#include "brw_winsys.h"
+
+#include "pipe/p_winsys.h"
+#include "pipe/p_context.h"
+#include "pipe/p_util.h"
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+#ifndef BRW_DEBUG
+int BRW_DEBUG = (0);
+#endif
+
+static void brw_destroy(struct pipe_context *pipe)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ FREE(brw);
+}
+
+static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ int x, y, w, h;
+ /* FIXME: corny... */
+
+ x = 0;
+ y = 0;
+ w = ps->width;
+ h = ps->height;
+
+ pipe->surface_fill(pipe, ps, x, y, w, h, clearValue);
+}
+
+
+static int
+brw_get_param(struct pipe_context *pipe, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 8;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 11; /* max 1024x1024 */
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 8; /* max 128x128x128 */
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 11; /* max 1024x1024 */
+ default:
+ return 0;
+ }
+}
+
+
+static float
+brw_get_paramf(struct pipe_context *pipe, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 7.5;
+
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 255.0;
+
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 4.0;
+
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0;
+
+ default:
+ return 0;
+ }
+}
+
+static boolean
+brw_is_format_supported( struct pipe_context *pipe,
+ uint format )
+{
+#if 0
+ /* XXX: This is broken -- rewrite if still needed. */
+ static const unsigned tex_supported[] = {
+ PIPE_FORMAT_U_R8_G8_B8_A8,
+ PIPE_FORMAT_U_A8_R8_G8_B8,
+ PIPE_FORMAT_U_R5_G6_B5,
+ PIPE_FORMAT_U_L8,
+ PIPE_FORMAT_U_A8,
+ PIPE_FORMAT_U_I8,
+ PIPE_FORMAT_U_L8_A8,
+ PIPE_FORMAT_YCBCR,
+ PIPE_FORMAT_YCBCR_REV,
+ PIPE_FORMAT_S8_Z24,
+ };
+
+
+ /* Actually a lot more than this - add later:
+ */
+ static const unsigned render_supported[] = {
+ PIPE_FORMAT_U_A8_R8_G8_B8,
+ PIPE_FORMAT_U_R5_G6_B5,
+ };
+
+ /*
+ */
+ static const unsigned z_stencil_supported[] = {
+ PIPE_FORMAT_U_Z16,
+ PIPE_FORMAT_U_Z32,
+ PIPE_FORMAT_S8_Z24,
+ };
+
+ switch (type) {
+ case PIPE_RENDER_FORMAT:
+ *numFormats = Elements(render_supported);
+ return render_supported;
+
+ case PIPE_TEX_FORMAT:
+ *numFormats = Elements(tex_supported);
+ return render_supported;
+
+ case PIPE_Z_STENCIL_FORMAT:
+ *numFormats = Elements(render_supported);
+ return render_supported;
+
+ default:
+ *numFormats = 0;
+ return NULL;
+ }
+#else
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ return TRUE;
+ };
+ return FALSE;
+#endif
+}
+
+
+
+
+struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys,
+ struct brw_winsys *brw_winsys,
+ unsigned pci_id)
+{
+ struct brw_context *brw;
+
+ pipe_winsys->printf(pipe_winsys,
+ "%s: creating brw_context with pci id 0x%x\n",
+ __FUNCTION__, pci_id);
+
+ brw = CALLOC_STRUCT(brw_context);
+ if (brw == NULL)
+ return NULL;
+
+ brw->winsys = brw_winsys;
+ brw->pipe.winsys = pipe_winsys;
+
+ brw->pipe.destroy = brw_destroy;
+ brw->pipe.is_format_supported = brw_is_format_supported;
+ brw->pipe.get_param = brw_get_param;
+ brw->pipe.get_paramf = brw_get_paramf;
+ brw->pipe.clear = brw_clear;
+ brw->pipe.texture_create = brw_texture_create;
+ brw->pipe.texture_release = brw_texture_release;
+
+ brw_init_surface_functions(brw);
+ brw_init_state_functions(brw);
+ brw_init_flush_functions(brw);
+ brw_init_string_functions(brw);
+ brw_init_draw_functions( brw );
+
+
+ brw_init_state( brw );
+
+ brw->pci_id = pci_id;
+ brw->dirty = ~0;
+ brw->hardware_dirty = ~0;
+ brw->batch_start = NULL;
+
+ memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind));
+
+ return &brw->pipe;
+}
+
diff --git a/src/mesa/pipe/i965simple/brw_context.h b/src/mesa/pipe/i965simple/brw_context.h
new file mode 100644
index 0000000000..53f66cd6a9
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_context.h
@@ -0,0 +1,698 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRWCONTEXT_INC
+#define BRWCONTEXT_INC
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "brw_structs.h"
+#include "brw_winsys.h"
+
+
+/* Glossary:
+ *
+ * URB - uniform resource buffer. A mid-sized buffer which is
+ * partitioned between the fixed function units and used for passing
+ * values (vertices, primitives, constants) between them.
+ *
+ * CURBE - constant URB entry. An urb region (entry) used to hold
+ * constant values which the fixed function units can be instructed to
+ * preload into the GRF when spawining a thread.
+ *
+ * VUE - vertex URB entry. An urb entry holding a vertex and usually
+ * a vertex header. The header contains control information and
+ * things like primitive type, Begin/end flags and clip codes.
+ *
+ * PUE - primitive URB entry. An urb entry produced by the setup (SF)
+ * unit holding rasterization and interpolation parameters.
+ *
+ * GRF - general register file. One of several register files
+ * addressable by programmed threads. The inputs (r0, payload, curbe,
+ * urb) of the thread are preloaded to this area before the thread is
+ * spawned. The registers are individually 8 dwords wide and suitable
+ * for general usage. Registers holding thread input values are not
+ * special and may be overwritten.
+ *
+ * MRF - message register file. Threads communicate (and terminate)
+ * by sending messages. Message parameters are placed in contigous
+ * MRF registers. All program output is via these messages. URB
+ * entries are populated by sending a message to the shared URB
+ * function containing the new data, together with a control word,
+ * often an unmodified copy of R0.
+ *
+ * R0 - GRF register 0. Typically holds control information used when
+ * sending messages to other threads.
+ *
+ * EU or GEN4 EU: The name of the programmable subsystem of the
+ * i965 hardware. Threads are executed by the EU, the registers
+ * described above are part of the EU architecture.
+ *
+ * Fixed function units:
+ *
+ * CS - Command streamer. Notional first unit, little software
+ * interaction. Holds the URB entries used for constant data, ie the
+ * CURBEs.
+ *
+ * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of
+ * this unit is responsible for pulling vertices out of vertex buffers
+ * in vram and injecting them into the processing pipe as VUEs. If
+ * enabled, it first passes them to a VS thread which is a good place
+ * for the driver to implement any active vertex shader.
+ *
+ * GS - Geometry Shader. This corresponds to a new DX10 concept. If
+ * enabled, incoming strips etc are passed to GS threads in individual
+ * line/triangle/point units. The GS thread may perform arbitary
+ * computation and emit whatever primtives with whatever vertices it
+ * chooses. This makes GS an excellent place to implement GL's
+ * unfilled polygon modes, though of course it is capable of much
+ * more. Additionally, GS is used to translate away primitives not
+ * handled by latter units, including Quads and Lineloops.
+ *
+ * CS - Clipper. Mesa's clipping algorithms are imported to run on
+ * this unit. The fixed function part performs cliptesting against
+ * the 6 fixed clipplanes and makes descisions on whether or not the
+ * incoming primitive needs to be passed to a thread for clipping.
+ * User clip planes are handled via cooperation with the VS thread.
+ *
+ * SF - Strips Fans or Setup: Triangles are prepared for
+ * rasterization. Interpolation coefficients are calculated.
+ * Flatshading and two-side lighting usually performed here.
+ *
+ * WM - Windower. Interpolation of vertex attributes performed here.
+ * Fragment shader implemented here. SIMD aspects of EU taken full
+ * advantage of, as pixels are processed in blocks of 16.
+ *
+ * CC - Color Calculator. No EU threads associated with this unit.
+ * Handles blending and (presumably) depth and stencil testing.
+ */
+
+#define BRW_FALLBACK_TEXTURE 0x1
+#define BRW_MAX_CURBE (32*16)
+
+struct brw_context;
+struct brw_winsys;
+
+
+/* Raised when we receive new state across the pipe interface:
+ */
+#define BRW_NEW_VIEWPORT 0x1
+#define BRW_NEW_RASTERIZER 0x2
+#define BRW_NEW_FS 0x4
+#define BRW_NEW_BLEND 0x8
+#define BRW_NEW_CLIP 0x10
+#define BRW_NEW_SCISSOR 0x20
+#define BRW_NEW_STIPPLE 0x40
+#define BRW_NEW_FRAMEBUFFER 0x80
+#define BRW_NEW_ALPHA_TEST 0x100
+#define BRW_NEW_DEPTH_STENCIL 0x200
+#define BRW_NEW_SAMPLER 0x400
+#define BRW_NEW_TEXTURE 0x800
+#define BRW_NEW_CONSTANTS 0x1000
+#define BRW_NEW_VBO 0x2000
+#define BRW_NEW_VS 0x4000
+
+/* Raised for other internal events:
+ */
+#define BRW_NEW_URB_FENCE 0x10000
+#define BRW_NEW_INPUT_DIMENSIONS 0x20000
+#define BRW_NEW_CURBE_OFFSETS 0x40000
+#define BRW_NEW_REDUCED_PRIMITIVE 0x80000
+#define BRW_NEW_PRIMITIVE 0x100000
+#define BRW_NEW_CONTEXT 0x200000
+#define BRW_NEW_WM_INPUT_DIMENSIONS 0x400000
+#define BRW_NEW_INPUT_VARYING 0x800000
+#define BRW_NEW_PSP 0x1000000
+
+#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1))
+extern int BRW_DEBUG;
+
+#define DEBUG_TEXTURE 0x1
+#define DEBUG_STATE 0x2
+#define DEBUG_IOCTL 0x4
+#define DEBUG_PRIMS 0x8
+#define DEBUG_VERTS 0x10
+#define DEBUG_FALLBACKS 0x20
+#define DEBUG_VERBOSE 0x40
+#define DEBUG_DRI 0x80
+#define DEBUG_DMA 0x100
+#define DEBUG_SANITY 0x200
+#define DEBUG_SYNC 0x400
+#define DEBUG_SLEEP 0x800
+#define DEBUG_PIXEL 0x1000
+#define DEBUG_STATS 0x2000
+#define DEBUG_TILE 0x4000
+#define DEBUG_SINGLE_THREAD 0x8000
+#define DEBUG_WM 0x10000
+#define DEBUG_URB 0x20000
+#define DEBUG_VS 0x40000
+#define DEBUG_BATCH 0x80000
+#define DEBUG_BUFMGR 0x100000
+#define DEBUG_BLIT 0x200000
+#define DEBUG_REGION 0x400000
+#define DEBUG_MIPTREE 0x800000
+
+#define DBG(...) do { \
+ if (BRW_DEBUG & FILE_DEBUG_FLAG) \
+ brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \
+} while(0)
+
+#define PRINT(...) do { \
+ brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \
+} while(0)
+
+struct brw_state_flags {
+ unsigned cache;
+ unsigned brw;
+};
+
+struct brw_vertex_program {
+ struct pipe_shader_state program;
+ unsigned id;
+ unsigned param_state; /* flags indicating state tracked by params */
+};
+
+
+
+struct brw_fragment_program {
+ struct pipe_shader_state program;
+ unsigned id;
+ unsigned param_state; /* flags indicating state tracked by params */
+ boolean UsesKill;
+ boolean ComputesDepth;
+};
+
+
+struct brw_texture {
+ struct pipe_texture base;
+
+ /* Derived from the above:
+ */
+ unsigned pitch;
+ unsigned depth_pitch; /* per-image on i945? */
+ unsigned total_height;
+
+ unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* Explicitly store the offset of each image for each cube face or
+ * depth value. Pretty much have to accept that hardware formats
+ * are going to be so diverse that there is no unified way to
+ * compute the offsets of depth/cube images within a mipmap level,
+ * so have to store them as a lookup table:
+ */
+ unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */
+
+ /* Includes image offset tables:
+ */
+ unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* The data is held here:
+ */
+ struct pipe_buffer_handle *buffer;
+};
+
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+struct brw_wm_prog_data {
+ unsigned curb_read_length;
+ unsigned urb_read_length;
+
+ unsigned first_curbe_grf;
+ unsigned total_grf;
+ unsigned total_scratch;
+
+ unsigned nr_params;
+ boolean error;
+
+ /* Pointer to tracked values (only valid once
+ * _mesa_load_state_parameters has been called at runtime).
+ */
+ const float *param[BRW_MAX_CURBE];
+};
+
+struct brw_sf_prog_data {
+ unsigned urb_read_length;
+ unsigned total_grf;
+
+ /* Each vertex may have upto 12 attributes, 4 components each,
+ * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11
+ * rows.
+ *
+ * Actually we use 4 for each, so call it 12 rows.
+ */
+ unsigned urb_entry_size;
+};
+
+struct brw_clip_prog_data {
+ unsigned curb_read_length; /* user planes? */
+ unsigned clip_mode;
+ unsigned urb_read_length;
+ unsigned total_grf;
+};
+
+struct brw_gs_prog_data {
+ unsigned urb_read_length;
+ unsigned total_grf;
+};
+
+struct brw_vs_prog_data {
+ unsigned curb_read_length;
+ unsigned urb_read_length;
+ unsigned total_grf;
+ unsigned outputs_written;
+
+ unsigned inputs_read;
+
+ /* Used for calculating urb partitions:
+ */
+ unsigned urb_entry_size;
+};
+
+
+/* Size == 0 if output either not written, or always [0,0,0,1]
+ */
+struct brw_vs_ouput_sizes {
+ ubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
+};
+
+
+#define BRW_MAX_TEX_UNIT 8
+#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1
+
+/* Create a fixed sized struct for caching binding tables:
+ */
+struct brw_surface_binding_table {
+ unsigned surf_ss_offset[BRW_WM_MAX_SURF];
+};
+
+
+struct brw_cache;
+
+struct brw_mem_pool {
+ struct pipe_buffer_handle *buffer;
+
+ unsigned size;
+ unsigned offset; /* offset of first free byte */
+
+ struct brw_context *brw;
+};
+
+struct brw_cache_item {
+ unsigned hash;
+ unsigned key_size; /* for variable-sized keys */
+ const void *key;
+
+ unsigned offset; /* offset within pool's buffer */
+ unsigned data_size;
+
+ struct brw_cache_item *next;
+};
+
+
+
+struct brw_cache {
+ unsigned id;
+
+ const char *name;
+
+ struct brw_context *brw;
+ struct brw_mem_pool *pool;
+
+ struct brw_cache_item **items;
+ unsigned size, n_items;
+
+ unsigned key_size; /* for fixed-size keys */
+ unsigned aux_size;
+
+ unsigned last_addr; /* offset of active item */
+};
+
+
+
+
+/* Considered adding a member to this struct to document which flags
+ * an update might raise so that ordering of the state atoms can be
+ * checked or derived at runtime. Dropped the idea in favor of having
+ * a debug mode where the state is monitored for flags which are
+ * raised that have already been tested against.
+ */
+struct brw_tracked_state {
+ struct brw_state_flags dirty;
+ void (*update)( struct brw_context *brw );
+ void (*emit_reloc)( struct brw_context *brw );
+ boolean always_update;
+};
+
+
+/* Flags for brw->state.cache.
+ */
+#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
+#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
+#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
+#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
+#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
+#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
+#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
+#define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
+#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
+#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
+#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
+#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT)
+#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
+#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
+#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
+#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
+#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE)
+#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND)
+
+
+
+
+enum brw_mempool_id {
+ BRW_GS_POOL,
+ BRW_SS_POOL,
+ BRW_MAX_POOL
+};
+
+
+struct brw_cached_batch_item {
+ struct header *header;
+ unsigned sz;
+ struct brw_cached_batch_item *next;
+};
+
+
+
+/* Protect against a future where PIPE_ATTRIB_MAX > 32. Wouldn't life
+ * be easier if C allowed arrays of packed elements?
+ */
+#define ATTRIB_BIT_DWORDS ((PIPE_ATTRIB_MAX+31)/32)
+
+struct brw_vertex_element {
+ struct brw_vertex_element_state vep;
+
+ unsigned index;
+ unsigned element_size;
+ unsigned count;
+ unsigned vbo_rebase_offset;
+};
+
+
+
+struct brw_vertex_info {
+ unsigned varying; /* varying:1[PIPE_ATTRIB_MAX] */
+ unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_ATTRIB_MAX] */
+};
+
+
+
+
+
+struct brw_context
+{
+ struct pipe_context pipe;
+ struct brw_winsys *winsys;
+
+ unsigned primitive;
+ unsigned reduced_primitive;
+
+ boolean emit_state_always;
+
+ struct {
+ struct brw_state_flags dirty;
+ struct brw_tracked_state **atoms;
+ unsigned nr_atoms;
+ } state;
+
+
+ struct {
+ const struct pipe_alpha_test_state *AlphaTest;
+ const struct pipe_blend_state *Blend;
+ const struct pipe_depth_stencil_state *DepthStencil;
+ const struct pipe_poly_stipple *PolygonStipple;
+ const struct pipe_rasterizer_state *Raster;
+ const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS];
+ const struct brw_vertex_program *VertexProgram;
+ const struct brw_fragment_program *FragmentProgram;
+
+ struct pipe_clip_state Clip;
+ struct pipe_blend_color BlendColor;
+ struct pipe_scissor_state Scissor;
+ struct pipe_viewport_state Viewport;
+ struct pipe_framebuffer_state FrameBuffer;
+
+ const struct pipe_constant_buffer *Constants[2];
+ const struct brw_texture *Texture[PIPE_MAX_SAMPLERS];
+ } attribs;
+
+ struct brw_mem_pool pool[BRW_MAX_POOL];
+ struct brw_cache cache[BRW_MAX_CACHE];
+ struct brw_cached_batch_item *cached_batch_items;
+
+ struct {
+
+ /* Arrays with buffer objects to copy non-bufferobj arrays into
+ * for upload:
+ */
+ struct pipe_vertex_buffer vbo_array[PIPE_ATTRIB_MAX];
+
+ struct brw_vertex_element inputs[PIPE_ATTRIB_MAX];
+
+#define BRW_NR_UPLOAD_BUFS 17
+#define BRW_UPLOAD_INIT_SIZE (128*1024)
+
+ struct {
+ struct pipe_buffer_handle *vbo[BRW_NR_UPLOAD_BUFS];
+ unsigned buf;
+ unsigned offset;
+ unsigned size;
+ unsigned wrap;
+ } upload;
+
+ /* Summary of size and varying of active arrays, so we can check
+ * for changes to this state:
+ */
+ struct brw_vertex_info info;
+ int last_vb;
+ } vb;
+
+
+ unsigned *batch_start;
+ unsigned hardware_dirty;
+ unsigned dirty;
+ unsigned pci_id;
+
+ /* BRW_NEW_URB_ALLOCATIONS:
+ */
+ struct {
+ unsigned vsize; /* vertex size plus header in urb registers */
+ unsigned csize; /* constant buffer size in urb registers */
+ unsigned sfsize; /* setup data size in urb registers */
+
+ boolean constrained;
+
+ unsigned nr_vs_entries;
+ unsigned nr_gs_entries;
+ unsigned nr_clip_entries;
+ unsigned nr_sf_entries;
+ unsigned nr_cs_entries;
+
+/* unsigned vs_size; */
+/* unsigned gs_size; */
+/* unsigned clip_size; */
+/* unsigned sf_size; */
+/* unsigned cs_size; */
+
+ unsigned vs_start;
+ unsigned gs_start;
+ unsigned clip_start;
+ unsigned sf_start;
+ unsigned cs_start;
+ } urb;
+
+
+ /* BRW_NEW_CURBE_OFFSETS:
+ */
+ struct {
+ unsigned wm_start;
+ unsigned wm_size;
+ unsigned clip_start;
+ unsigned clip_size;
+ unsigned vs_start;
+ unsigned vs_size;
+ unsigned total_size;
+
+ /* Dynamic tracker which changes to reflect the state referenced
+ * by active fp and vp program parameters:
+ */
+ struct brw_tracked_state tracked_state;
+
+ unsigned gs_offset;
+
+ float *last_buf;
+ unsigned last_bufsz;
+ } curbe;
+
+ struct {
+ struct brw_vs_prog_data *prog_data;
+
+ unsigned prog_gs_offset;
+ unsigned state_gs_offset;
+ } vs;
+
+ struct {
+ struct brw_gs_prog_data *prog_data;
+
+ boolean prog_active;
+ unsigned prog_gs_offset;
+ unsigned state_gs_offset;
+ } gs;
+
+ struct {
+ struct brw_clip_prog_data *prog_data;
+
+ unsigned prog_gs_offset;
+ unsigned vp_gs_offset;
+ unsigned state_gs_offset;
+ } clip;
+
+
+ struct {
+ struct brw_sf_prog_data *prog_data;
+
+ unsigned prog_gs_offset;
+ unsigned vp_gs_offset;
+ unsigned state_gs_offset;
+ } sf;
+
+ struct {
+ struct brw_wm_prog_data *prog_data;
+ struct brw_wm_compile *compile_data;
+
+ /* Input sizes, calculated from active vertex program:
+ */
+ unsigned input_size_masks[4];
+
+
+ /**
+ * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER
+ * cache
+ */
+ struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+
+ unsigned render_surf;
+ unsigned nr_surfaces;
+
+ unsigned max_threads;
+ struct pipe_buffer_handle *scratch_buffer;
+ unsigned scratch_buffer_size;
+
+ unsigned sampler_count;
+ unsigned sampler_gs_offset;
+
+ struct brw_surface_binding_table bind;
+ unsigned bind_ss_offset;
+
+ unsigned prog_gs_offset;
+ unsigned state_gs_offset;
+ } wm;
+
+
+ struct {
+ unsigned vp_gs_offset;
+ unsigned state_gs_offset;
+ } cc;
+
+
+ /* Used to give every program string a unique id
+ */
+ unsigned program_id;
+};
+
+
+#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a)
+
+
+/*======================================================================
+ * brw_vtbl.c
+ */
+void brw_do_flush( struct brw_context *brw,
+ unsigned flags );
+
+
+/*======================================================================
+ * brw_state.c
+ */
+void brw_validate_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
+
+/*======================================================================
+ * brw_tex.c
+ */
+void brwUpdateTextureState( struct brw_context *brw );
+void brw_FrameBufferTexInit( struct brw_context *brw );
+void brw_FrameBufferTexDestroy( struct brw_context *brw );
+
+
+/* brw_urb.c
+ */
+void brw_upload_urb_fence(struct brw_context *brw);
+
+void brw_upload_constant_buffer_state(struct brw_context *brw);
+void brw_upload_polygon_stipple(struct brw_context *brw);
+void brw_upload_line_stipple(struct brw_context *brw);
+
+void brw_init_surface_functions(struct brw_context *brw);
+void brw_init_state_functions(struct brw_context *brw);
+void brw_init_flush_functions(struct brw_context *brw);
+void brw_init_string_functions(struct brw_context *brw);
+
+/*======================================================================
+ * Inline conversion functions. These are better-typed than the
+ * macros used previously:
+ */
+static inline struct brw_context *
+brw_context( struct pipe_context *ctx )
+{
+ return (struct brw_context *)ctx;
+}
+
+#endif
+
diff --git a/src/mesa/pipe/i965simple/brw_curbe.c b/src/mesa/pipe/i965simple/brw_curbe.c
new file mode 100644
index 0000000000..0894e82d56
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_curbe.c
@@ -0,0 +1,369 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_util.h"
+
+#define FILE_DEBUG_FLAG DEBUG_FALLBACKS
+
+/* Partition the CURBE between the various users of constant values:
+ */
+static void calculate_curbe_offsets( struct brw_context *brw )
+{
+ /* CACHE_NEW_WM_PROG */
+ unsigned nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->attribs.VertexProgram;
+ unsigned nr_vp_regs = (vp->program.num_inputs * 4 + 15) / 16;
+ unsigned nr_clip_regs = 0;
+ unsigned total_regs;
+
+#if 0
+ /* BRW_NEW_CLIP ? */
+ if (brw->attribs.Transform->ClipPlanesEnabled) {
+ unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled);
+ nr_clip_regs = (nr_planes * 4 + 15) / 16;
+ }
+#endif
+
+
+ total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
+
+ /* This can happen - what to do? Probably rather than falling
+ * back, the best thing to do is emit programs which code the
+ * constants as immediate values. Could do this either as a static
+ * cap on WM and VS, or adaptively.
+ *
+ * Unfortunately, this is currently dependent on the results of the
+ * program generation process (in the case of wm), so this would
+ * introduce the need to re-generate programs in the event of a
+ * curbe allocation failure.
+ */
+ /* Max size is 32 - just large enough to
+ * hold the 128 parameters allowed by
+ * the fragment and vertex program
+ * api's. It's not clear what happens
+ * when both VP and FP want to use 128
+ * parameters, though.
+ */
+ assert(total_regs <= 32);
+
+ /* Lazy resize:
+ */
+ if (nr_fp_regs > brw->curbe.wm_size ||
+ nr_vp_regs > brw->curbe.vs_size ||
+ nr_clip_regs != brw->curbe.clip_size ||
+ (total_regs < brw->curbe.total_size / 4 &&
+ brw->curbe.total_size > 16)) {
+
+ unsigned reg = 0;
+
+ /* Calculate a new layout:
+ */
+ reg = 0;
+ brw->curbe.wm_start = reg;
+ brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
+ brw->curbe.clip_start = reg;
+ brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
+ brw->curbe.vs_start = reg;
+ brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
+ brw->curbe.total_size = reg;
+
+#if 0
+ if (0)
+ DBG("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+ brw->curbe.wm_start,
+ brw->curbe.wm_size,
+ brw->curbe.clip_start,
+ brw->curbe.clip_size,
+ brw->curbe.vs_start,
+ brw->curbe.vs_size );
+#endif
+
+ brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
+ }
+}
+
+
+const struct brw_tracked_state brw_curbe_offsets = {
+ .dirty = {
+ .brw = (BRW_NEW_CLIP |
+ BRW_NEW_VS),
+ .cache = CACHE_NEW_WM_PROG
+ },
+ .update = calculate_curbe_offsets
+};
+
+
+
+/* Define the number of curbes within CS's urb allocation. Multiple
+ * urb entries -> multiple curbes. These will be used by
+ * fixed-function hardware in a double-buffering scheme to avoid a
+ * pipeline stall each time the contents of the curbe is changed.
+ */
+void brw_upload_constant_buffer_state(struct brw_context *brw)
+{
+ struct brw_constant_buffer_state cbs;
+ memset(&cbs, 0, sizeof(cbs));
+
+ /* It appears that this is the state packet for the CS unit, ie. the
+ * urb entries detailed here are housed in the CS range from the
+ * URB_FENCE command.
+ */
+ cbs.header.opcode = CMD_CONST_BUFFER_STATE;
+ cbs.header.length = sizeof(cbs)/4 - 2;
+
+ /* BRW_NEW_URB_FENCE */
+ cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+ cbs.bits0.urb_entry_size = brw->urb.csize - 1;
+
+ assert(brw->urb.nr_cs_entries);
+ BRW_CACHED_BATCH_STRUCT(brw, &cbs);
+}
+
+const struct brw_tracked_state brw_constant_buffer_state = {
+ .dirty = {
+ .brw = BRW_NEW_URB_FENCE,
+ .cache = 0
+ },
+ .update = brw_upload_constant_buffer_state
+};
+
+
+static float fixed_plane[6][4] = {
+ { 0, 0, -1, 1 },
+ { 0, 0, 1, 1 },
+ { 0, -1, 0, 1 },
+ { 0, 1, 0, 1 },
+ {-1, 0, 0, 1 },
+ { 1, 0, 0, 1 }
+};
+
+#if 0
+/* Upload a new set of constants. Too much variability to go into the
+ * cache mechanism, but maybe would benefit from a comparison against
+ * the current uploaded set of constants.
+ */
+static void upload_constant_buffer(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
+ struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
+ struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
+ unsigned sz = brw->curbe.total_size;
+ unsigned bufsz = sz * 16 * sizeof(float);
+ float *buf;
+ unsigned i;
+
+ /* Update our own dependency flags. This works because this
+ * function will also be called whenever fp or vp changes.
+ */
+ brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
+ brw->curbe.tracked_state.dirty.mesa |= vp->param_state;
+ brw->curbe.tracked_state.dirty.mesa |= fp->param_state;
+
+ if (sz == 0) {
+ struct brw_constant_buffer cb;
+ cb.header.opcode = CMD_CONST_BUFFER;
+ cb.header.length = sizeof(cb)/4 - 2;
+ cb.header.valid = 0;
+ cb.bits0.buffer_length = 0;
+ cb.bits0.buffer_address = 0;
+ BRW_BATCH_STRUCT(brw, &cb);
+
+ if (brw->curbe.last_buf) {
+ free(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ brw->curbe.last_bufsz = 0;
+ }
+
+ return;
+ }
+
+ buf = (float *)malloc(bufsz);
+
+ memset(buf, 0, bufsz);
+
+ if (brw->curbe.wm_size) {
+ unsigned offset = brw->curbe.wm_start * 16;
+
+ _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+
+ for (i = 0; i < brw->wm.prog_data->nr_params; i++)
+ buf[offset + i] = brw->wm.prog_data->param[i][0];
+ }
+
+
+ /* The clipplanes are actually delivered to both CLIP and VS units.
+ * VS uses them to calculate the outcode bitmasks.
+ */
+ if (brw->curbe.clip_size) {
+ unsigned offset = brw->curbe.clip_start * 16;
+ unsigned j;
+
+ /* If any planes are going this way, send them all this way:
+ */
+ for (i = 0; i < 6; i++) {
+ buf[offset + i * 4 + 0] = fixed_plane[i][0];
+ buf[offset + i * 4 + 1] = fixed_plane[i][1];
+ buf[offset + i * 4 + 2] = fixed_plane[i][2];
+ buf[offset + i * 4 + 3] = fixed_plane[i][3];
+ }
+
+ /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
+ * clip-space:
+ */
+ assert(MAX_CLIP_PLANES == 6);
+ for (j = 0; j < MAX_CLIP_PLANES; j++) {
+ if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) {
+ buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0];
+ buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1];
+ buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2];
+ buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3];
+ i++;
+ }
+ }
+ }
+
+
+ if (brw->curbe.vs_size) {
+ unsigned offset = brw->curbe.vs_start * 16;
+ unsigned nr = vp->program.Base.Parameters->NumParameters;
+
+ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+
+ for (i = 0; i < nr; i++) {
+ buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0];
+ buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1];
+ buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2];
+ buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3];
+ }
+ }
+
+ if (0) {
+ for (i = 0; i < sz*16; i+=4)
+ _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+ buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+
+ _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+ brw->curbe.last_buf, buf,
+ bufsz, brw->curbe.last_bufsz,
+ brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+ }
+
+ if (brw->curbe.last_buf &&
+ bufsz == brw->curbe.last_bufsz &&
+ memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
+ free(buf);
+/* return; */
+ }
+ else {
+ if (brw->curbe.last_buf)
+ free(brw->curbe.last_buf);
+ brw->curbe.last_buf = buf;
+ brw->curbe.last_bufsz = bufsz;
+
+
+ if (!brw_pool_alloc(pool,
+ bufsz,
+ 1 << 6,
+ &brw->curbe.gs_offset)) {
+ _mesa_printf("out of GS memory for curbe\n");
+ assert(0);
+ return;
+ }
+
+
+ /* Copy data to the buffer:
+ */
+ dri_bo_subdata(pool->buffer, brw->curbe.gs_offset, bufsz, buf);
+ }
+
+ /* TODO: only emit the constant_buffer packet when necessary, ie:
+ - contents have changed
+ - offset has changed
+ - hw requirements due to other packets emitted.
+ */
+ {
+ struct brw_constant_buffer cb;
+
+ memset(&cb, 0, sizeof(cb));
+
+ cb.header.opcode = CMD_CONST_BUFFER;
+ cb.header.length = sizeof(cb)/4 - 2;
+ cb.header.valid = 1;
+ cb.bits0.buffer_length = sz - 1;
+ cb.bits0.buffer_address = brw->curbe.gs_offset >> 6;
+
+ /* Because this provokes an action (ie copy the constants into the
+ * URB), it shouldn't be shortcircuited if identical to the
+ * previous time - because eg. the urb destination may have
+ * changed, or the urb contents different to last time.
+ *
+ * Note that the data referred to is actually copied internally,
+ * not just used in place according to passed pointer.
+ *
+ * It appears that the CS unit takes care of using each available
+ * URB entry (Const URB Entry == CURBE) in turn, and issuing
+ * flushes as necessary when doublebuffering of CURBEs isn't
+ * possible.
+ */
+/* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */
+ BRW_BATCH_STRUCT(brw, &cb);
+/* intel_batchbuffer_align(brw->intel.batch, 64, 0); */
+ }
+}
+
+/* This tracked state is unique in that the state it monitors varies
+ * dynamically depending on the parameters tracked by the fragment and
+ * vertex programs. This is the template used as a starting point,
+ * each context will maintain a copy of this internally and update as
+ * required.
+ */
+const struct brw_tracked_state brw_constant_buffer = {
+ .dirty = {
+ .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_VERTEX_PROGRAM |
+ BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_CURBE_OFFSETS),
+ .cache = (CACHE_NEW_WM_PROG)
+ },
+ .update = upload_constant_buffer
+};
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_defines.h b/src/mesa/pipe/i965simple/brw_defines.h
new file mode 100644
index 0000000000..9379a397f6
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_defines.h
@@ -0,0 +1,852 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/*
+ */
+#define MI_NOOP 0x00
+#define MI_USER_INTERRUPT 0x02
+#define MI_WAIT_FOR_EVENT 0x03
+#define MI_FLUSH 0x04
+#define MI_REPORT_HEAD 0x07
+#define MI_ARB_ON_OFF 0x08
+#define MI_BATCH_BUFFER_END 0x0A
+#define MI_OVERLAY_FLIP 0x11
+#define MI_LOAD_SCAN_LINES_INCL 0x12
+#define MI_LOAD_SCAN_LINES_EXCL 0x13
+#define MI_DISPLAY_BUFFER_INFO 0x14
+#define MI_SET_CONTEXT 0x18
+#define MI_STORE_DATA_IMM 0x20
+#define MI_STORE_DATA_INDEX 0x21
+#define MI_LOAD_REGISTER_IMM 0x22
+#define MI_STORE_REGISTER_MEM 0x24
+#define MI_BATCH_BUFFER_START 0x31
+
+#define MI_SYNCHRONOUS_FLIP 0x0
+#define MI_ASYNCHRONOUS_FLIP 0x1
+
+#define MI_BUFFER_SECURE 0x0
+#define MI_BUFFER_NONSECURE 0x1
+
+#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0
+#define MI_ARBITRATE_BETWEEN_INSTS 0x1
+#define MI_NO_ARBITRATION 0x3
+
+#define MI_CONDITION_CODE_WAIT_DISABLED 0x0
+#define MI_CONDITION_CODE_WAIT_0 0x1
+#define MI_CONDITION_CODE_WAIT_1 0x2
+#define MI_CONDITION_CODE_WAIT_2 0x3
+#define MI_CONDITION_CODE_WAIT_3 0x4
+#define MI_CONDITION_CODE_WAIT_4 0x5
+
+#define MI_DISPLAY_PIPE_A 0x0
+#define MI_DISPLAY_PIPE_B 0x1
+
+#define MI_DISPLAY_PLANE_A 0x0
+#define MI_DISPLAY_PLANE_B 0x1
+#define MI_DISPLAY_PLANE_C 0x2
+
+#define MI_STANDARD_FLIP 0x0
+#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1
+#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2
+#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3
+
+#define MI_PHYSICAL_ADDRESS 0x0
+#define MI_VIRTUAL_ADDRESS 0x1
+
+#define MI_BUFFER_MEMORY_MAIN 0x0
+#define MI_BUFFER_MEMORY_GTT 0x2
+#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3
+
+#define MI_FLIP_CONTINUE 0x0
+#define MI_FLIP_ON 0x1
+#define MI_FLIP_OFF 0x2
+
+#define MI_UNTRUSTED_REGISTER_SPACE 0x0
+#define MI_TRUSTED_REGISTER_SPACE 0x1
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+#define _3DPRIMITIVE 0x00
+
+#define PIPE_CONTROL_NOWRITE 0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
+#define PIPE_CONTROL_WRITEDEPTH 0x02
+#define PIPE_CONTROL_WRITETIMESTAMP 0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define BRW_ANISORATIO_2 0
+#define BRW_ANISORATIO_4 1
+#define BRW_ANISORATIO_6 2
+#define BRW_ANISORATIO_8 3
+#define BRW_ANISORATIO_10 4
+#define BRW_ANISORATIO_12 5
+#define BRW_ANISORATIO_14 6
+#define BRW_ANISORATIO_16 7
+
+#define BRW_BLENDFACTOR_ONE 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR 0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA 0x3
+#define BRW_BLENDFACTOR_DST_ALPHA 0x4
+#define BRW_BLENDFACTOR_DST_COLOR 0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define BRW_BLENDFACTOR_CONST_COLOR 0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA 0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR 0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define BRW_BLENDFACTOR_ZERO 0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define BRW_BLENDFUNCTION_ADD 0
+#define BRW_BLENDFUNCTION_SUBTRACT 1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define BRW_BLENDFUNCTION_MIN 3
+#define BRW_BLENDFUNCTION_MAX 4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8 0
+#define BRW_ALPHATEST_FORMAT_FLOAT32 1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define BRW_CHROMAKEY_REPLACE_BLACK 1
+
+#define BRW_CLIP_API_OGL 0
+#define BRW_CLIP_API_DX 1
+
+#define BRW_CLIPMODE_NORMAL 0
+#define BRW_CLIPMODE_CLIP_ALL 1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
+#define BRW_CLIPMODE_REJECT_ALL 3
+#define BRW_CLIPMODE_ACCEPT_ALL 4
+
+#define BRW_CLIP_NDCSPACE 0
+#define BRW_CLIP_SCREENSPACE 1
+
+#define BRW_COMPAREFUNCTION_ALWAYS 0
+#define BRW_COMPAREFUNCTION_NEVER 1
+#define BRW_COMPAREFUNCTION_LESS 2
+#define BRW_COMPAREFUNCTION_EQUAL 3
+#define BRW_COMPAREFUNCTION_LEQUAL 4
+#define BRW_COMPAREFUNCTION_GREATER 5
+#define BRW_COMPAREFUNCTION_NOTEQUAL 6
+#define BRW_COMPAREFUNCTION_GEQUAL 7
+
+#define BRW_COVERAGE_PIXELS_HALF 0
+#define BRW_COVERAGE_PIXELS_1 1
+#define BRW_COVERAGE_PIXELS_2 2
+#define BRW_COVERAGE_PIXELS_4 3
+
+#define BRW_CULLMODE_BOTH 0
+#define BRW_CULLMODE_NONE 1
+#define BRW_CULLMODE_FRONT 2
+#define BRW_CULLMODE_BACK 3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define BRW_DEPTHFORMAT_D32_FLOAT 1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define BRW_DEPTHFORMAT_D16_UNORM 5
+
+#define BRW_FLOATING_POINT_IEEE_754 0
+#define BRW_FLOATING_POINT_NON_IEEE_754 1
+
+#define BRW_FRONTWINDING_CW 0
+#define BRW_FRONTWINDING_CCW 1
+
+#define BRW_SPRITE_POINT_ENABLE 16
+
+#define BRW_INDEX_BYTE 0
+#define BRW_INDEX_WORD 1
+#define BRW_INDEX_DWORD 2
+
+#define BRW_LOGICOPFUNCTION_CLEAR 0
+#define BRW_LOGICOPFUNCTION_NOR 1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED 2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE 4
+#define BRW_LOGICOPFUNCTION_INVERT 5
+#define BRW_LOGICOPFUNCTION_XOR 6
+#define BRW_LOGICOPFUNCTION_NAND 7
+#define BRW_LOGICOPFUNCTION_AND 8
+#define BRW_LOGICOPFUNCTION_EQUIV 9
+#define BRW_LOGICOPFUNCTION_NOOP 10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED 11
+#define BRW_LOGICOPFUNCTION_COPY 12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE 13
+#define BRW_LOGICOPFUNCTION_OR 14
+#define BRW_LOGICOPFUNCTION_SET 15
+
+#define BRW_MAPFILTER_NEAREST 0x0
+#define BRW_MAPFILTER_LINEAR 0x1
+#define BRW_MAPFILTER_ANISOTROPIC 0x2
+
+#define BRW_MIPFILTER_NONE 0
+#define BRW_MIPFILTER_NEAREST 1
+#define BRW_MIPFILTER_LINEAR 3
+
+#define BRW_POLYGON_FRONT_FACING 0
+#define BRW_POLYGON_BACK_FACING 1
+
+#define BRW_PREFILTER_ALWAYS 0x0
+#define BRW_PREFILTER_NEVER 0x1
+#define BRW_PREFILTER_LESS 0x2
+#define BRW_PREFILTER_EQUAL 0x3
+#define BRW_PREFILTER_LEQUAL 0x4
+#define BRW_PREFILTER_GREATER 0x5
+#define BRW_PREFILTER_NOTEQUAL 0x6
+#define BRW_PREFILTER_GEQUAL 0x7
+
+#define BRW_PROVOKING_VERTEX_0 0
+#define BRW_PROVOKING_VERTEX_1 1
+#define BRW_PROVOKING_VERTEX_2 2
+
+#define BRW_RASTRULE_UPPER_LEFT 0
+#define BRW_RASTRULE_UPPER_RIGHT 1
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define BRW_STENCILOP_KEEP 0
+#define BRW_STENCILOP_ZERO 1
+#define BRW_STENCILOP_REPLACE 2
+#define BRW_STENCILOP_INCRSAT 3
+#define BRW_STENCILOP_DECRSAT 4
+#define BRW_STENCILOP_INCR 5
+#define BRW_STENCILOP_DECR 6
+#define BRW_STENCILOP_INVERT 7
+
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define BRW_SURFACEFORMAT_R32G32_SINT 0x086
+#define BRW_SURFACEFORMAT_R32G32_UINT 0x087
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define BRW_SURFACEFORMAT_R32_SINT 0x0D6
+#define BRW_SURFACEFORMAT_R32_UINT 0x0D7
+#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1
+#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106
+#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107
+#define BRW_SURFACEFORMAT_R8G8_SINT 0x108
+#define BRW_SURFACEFORMAT_R8G8_UINT 0x109
+#define BRW_SURFACEFORMAT_R16_UNORM 0x10A
+#define BRW_SURFACEFORMAT_R16_SNORM 0x10B
+#define BRW_SURFACEFORMAT_R16_SINT 0x10C
+#define BRW_SURFACEFORMAT_R16_UINT 0x10D
+#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E
+#define BRW_SURFACEFORMAT_I16_UNORM 0x111
+#define BRW_SURFACEFORMAT_L16_UNORM 0x112
+#define BRW_SURFACEFORMAT_A16_UNORM 0x113
+#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114
+#define BRW_SURFACEFORMAT_I16_FLOAT 0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT 0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED 0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM 0x140
+#define BRW_SURFACEFORMAT_R8_SNORM 0x141
+#define BRW_SURFACEFORMAT_R8_SINT 0x142
+#define BRW_SURFACEFORMAT_R8_UINT 0x143
+#define BRW_SURFACEFORMAT_A8_UNORM 0x144
+#define BRW_SURFACEFORMAT_I8_UNORM 0x145
+#define BRW_SURFACEFORMAT_L8_UNORM 0x146
+#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147
+#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
+#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
+#define BRW_SURFACEFORMAT_R1_UINT 0x181
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define BRW_SURFACEFORMAT_BC1_UNORM 0x186
+#define BRW_SURFACEFORMAT_BC2_UNORM 0x187
+#define BRW_SURFACEFORMAT_BC3_UNORM 0x188
+#define BRW_SURFACEFORMAT_BC4_UNORM 0x189
+#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define BRW_SURFACEFORMAT_MONO8 0x18E
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define BRW_SURFACEFORMAT_DXT1_RGB 0x191
+#define BRW_SURFACEFORMAT_FXT1 0x192
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define BRW_SURFACEFORMAT_BC4_SNORM 0x199
+#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32 0
+#define BRW_SURFACERETURNFORMAT_S1 1
+
+#define BRW_SURFACE_1D 0
+#define BRW_SURFACE_2D 1
+#define BRW_SURFACE_3D 2
+#define BRW_SURFACE_CUBE 3
+#define BRW_SURFACE_BUFFER 4
+#define BRW_SURFACE_NULL 7
+
+#define BRW_TEXCOORDMODE_WRAP 0
+#define BRW_TEXCOORDMODE_MIRROR 1
+#define BRW_TEXCOORDMODE_CLAMP 2
+#define BRW_TEXCOORDMODE_CUBE 3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER 4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define BRW_THREAD_PRIORITY_NORMAL 0
+#define BRW_THREAD_PRIORITY_HIGH 1
+
+#define BRW_TILEWALK_XMAJOR 0
+#define BRW_TILEWALK_YMAJOR 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0
+#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
+
+#define BRW_VFCOMPONENT_NOSTORE 0
+#define BRW_VFCOMPONENT_STORE_SRC 1
+#define BRW_VFCOMPONENT_STORE_0 2
+#define BRW_VFCOMPONENT_STORE_1_FLT 3
+#define BRW_VFCOMPONENT_STORE_1_INT 4
+#define BRW_VFCOMPONENT_STORE_VID 5
+#define BRW_VFCOMPONENT_STORE_IID 6
+#define BRW_VFCOMPONENT_STORE_PID 7
+
+
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1 0
+#define BRW_ALIGN_16 1
+
+#define BRW_ADDRESS_DIRECT 0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define BRW_CHANNEL_X 0
+#define BRW_CHANNEL_Y 1
+#define BRW_CHANNEL_Z 2
+#define BRW_CHANNEL_W 3
+
+#define BRW_COMPRESSION_NONE 0
+#define BRW_COMPRESSION_2NDHALF 1
+#define BRW_COMPRESSION_COMPRESSED 2
+
+#define BRW_CONDITIONAL_NONE 0
+#define BRW_CONDITIONAL_Z 1
+#define BRW_CONDITIONAL_NZ 2
+#define BRW_CONDITIONAL_EQ 1 /* Z */
+#define BRW_CONDITIONAL_NEQ 2 /* NZ */
+#define BRW_CONDITIONAL_G 3
+#define BRW_CONDITIONAL_GE 4
+#define BRW_CONDITIONAL_L 5
+#define BRW_CONDITIONAL_LE 6
+#define BRW_CONDITIONAL_C 7
+#define BRW_CONDITIONAL_O 8
+
+#define BRW_DEBUG_NONE 0
+#define BRW_DEBUG_BREAKPOINT 1
+
+#define BRW_DEPENDENCY_NORMAL 0
+#define BRW_DEPENDENCY_NOTCLEARED 1
+#define BRW_DEPENDENCY_NOTCHECKED 2
+#define BRW_DEPENDENCY_DISABLE 3
+
+#define BRW_EXECUTE_1 0
+#define BRW_EXECUTE_2 1
+#define BRW_EXECUTE_4 2
+#define BRW_EXECUTE_8 3
+#define BRW_EXECUTE_16 4
+#define BRW_EXECUTE_32 5
+
+#define BRW_HORIZONTAL_STRIDE_0 0
+#define BRW_HORIZONTAL_STRIDE_1 1
+#define BRW_HORIZONTAL_STRIDE_2 2
+#define BRW_HORIZONTAL_STRIDE_4 3
+
+#define BRW_INSTRUCTION_NORMAL 0
+#define BRW_INSTRUCTION_SATURATE 1
+
+#define BRW_MASK_ENABLE 0
+#define BRW_MASK_DISABLE 1
+
+#define BRW_OPCODE_MOV 1
+#define BRW_OPCODE_SEL 2
+#define BRW_OPCODE_NOT 4
+#define BRW_OPCODE_AND 5
+#define BRW_OPCODE_OR 6
+#define BRW_OPCODE_XOR 7
+#define BRW_OPCODE_SHR 8
+#define BRW_OPCODE_SHL 9
+#define BRW_OPCODE_RSR 10
+#define BRW_OPCODE_RSL 11
+#define BRW_OPCODE_ASR 12
+#define BRW_OPCODE_CMP 16
+#define BRW_OPCODE_JMPI 32
+#define BRW_OPCODE_IF 34
+#define BRW_OPCODE_IFF 35
+#define BRW_OPCODE_ELSE 36
+#define BRW_OPCODE_ENDIF 37
+#define BRW_OPCODE_DO 38
+#define BRW_OPCODE_WHILE 39
+#define BRW_OPCODE_BREAK 40
+#define BRW_OPCODE_CONTINUE 41
+#define BRW_OPCODE_HALT 42
+#define BRW_OPCODE_MSAVE 44
+#define BRW_OPCODE_MRESTORE 45
+#define BRW_OPCODE_PUSH 46
+#define BRW_OPCODE_POP 47
+#define BRW_OPCODE_WAIT 48
+#define BRW_OPCODE_SEND 49
+#define BRW_OPCODE_ADD 64
+#define BRW_OPCODE_MUL 65
+#define BRW_OPCODE_AVG 66
+#define BRW_OPCODE_FRC 67
+#define BRW_OPCODE_RNDU 68
+#define BRW_OPCODE_RNDD 69
+#define BRW_OPCODE_RNDE 70
+#define BRW_OPCODE_RNDZ 71
+#define BRW_OPCODE_MAC 72
+#define BRW_OPCODE_MACH 73
+#define BRW_OPCODE_LZD 74
+#define BRW_OPCODE_SAD2 80
+#define BRW_OPCODE_SADA2 81
+#define BRW_OPCODE_DP4 84
+#define BRW_OPCODE_DPH 85
+#define BRW_OPCODE_DP3 86
+#define BRW_OPCODE_DP2 87
+#define BRW_OPCODE_DPA2 88
+#define BRW_OPCODE_LINE 89
+#define BRW_OPCODE_NOP 126
+
+#define BRW_PREDICATE_NONE 0
+#define BRW_PREDICATE_NORMAL 1
+#define BRW_PREDICATE_ALIGN1_ANYV 2
+#define BRW_PREDICATE_ALIGN1_ALLV 3
+#define BRW_PREDICATE_ALIGN1_ANY2H 4
+#define BRW_PREDICATE_ALIGN1_ALL2H 5
+#define BRW_PREDICATE_ALIGN1_ANY4H 6
+#define BRW_PREDICATE_ALIGN1_ALL4H 7
+#define BRW_PREDICATE_ALIGN1_ANY8H 8
+#define BRW_PREDICATE_ALIGN1_ALL8H 9
+#define BRW_PREDICATE_ALIGN1_ANY16H 10
+#define BRW_PREDICATE_ALIGN1_ALL16H 11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
+#define BRW_PREDICATE_ALIGN16_ANY4H 6
+#define BRW_PREDICATE_ALIGN16_ALL4H 7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE 0
+#define BRW_GENERAL_REGISTER_FILE 1
+#define BRW_MESSAGE_REGISTER_FILE 2
+#define BRW_IMMEDIATE_VALUE 3
+
+#define BRW_REGISTER_TYPE_UD 0
+#define BRW_REGISTER_TYPE_D 1
+#define BRW_REGISTER_TYPE_UW 2
+#define BRW_REGISTER_TYPE_W 3
+#define BRW_REGISTER_TYPE_UB 4
+#define BRW_REGISTER_TYPE_B 5
+#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF 6
+#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F 7
+
+#define BRW_ARF_NULL 0x00
+#define BRW_ARF_ADDRESS 0x10
+#define BRW_ARF_ACCUMULATOR 0x20
+#define BRW_ARF_FLAG 0x30
+#define BRW_ARF_MASK 0x40
+#define BRW_ARF_MASK_STACK 0x50
+#define BRW_ARF_MASK_STACK_DEPTH 0x60
+#define BRW_ARF_STATE 0x70
+#define BRW_ARF_CONTROL 0x80
+#define BRW_ARF_NOTIFICATION_COUNT 0x90
+#define BRW_ARF_IP 0xA0
+
+#define BRW_AMASK 0
+#define BRW_IMASK 1
+#define BRW_LMASK 2
+#define BRW_CMASK 3
+
+
+
+#define BRW_THREAD_NORMAL 0
+#define BRW_THREAD_ATOMIC 1
+#define BRW_THREAD_SWITCH 2
+
+#define BRW_VERTICAL_STRIDE_0 0
+#define BRW_VERTICAL_STRIDE_1 1
+#define BRW_VERTICAL_STRIDE_2 2
+#define BRW_VERTICAL_STRIDE_4 3
+#define BRW_VERTICAL_STRIDE_8 4
+#define BRW_VERTICAL_STRIDE_16 5
+#define BRW_VERTICAL_STRIDE_32 6
+#define BRW_VERTICAL_STRIDE_64 7
+#define BRW_VERTICAL_STRIDE_128 8
+#define BRW_VERTICAL_STRIDE_256 9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define BRW_WIDTH_1 0
+#define BRW_WIDTH_2 1
+#define BRW_WIDTH_4 2
+#define BRW_WIDTH_8 3
+#define BRW_WIDTH_16 4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define BRW_POLYGON_FACING_FRONT 0
+#define BRW_POLYGON_FACING_BACK 1
+
+#define BRW_MESSAGE_TARGET_NULL 0
+#define BRW_MESSAGE_TARGET_MATH 1
+#define BRW_MESSAGE_TARGET_SAMPLER 2
+#define BRW_MESSAGE_TARGET_GATEWAY 3
+#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define BRW_MESSAGE_TARGET_URB 6
+#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define BRW_MATH_FUNCTION_INV 1
+#define BRW_MATH_FUNCTION_LOG 2
+#define BRW_MATH_FUNCTION_EXP 3
+#define BRW_MATH_FUNCTION_SQRT 4
+#define BRW_MATH_FUNCTION_RSQ 5
+#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN 9
+#define BRW_MATH_FUNCTION_POW 10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define BRW_MATH_INTEGER_UNSIGNED 0
+#define BRW_MATH_INTEGER_SIGNED 1
+
+#define BRW_MATH_PRECISION_FULL 0
+#define BRW_MATH_PRECISION_PARTIAL 1
+
+#define BRW_MATH_SATURATE_NONE 0
+#define BRW_MATH_SATURATE_SATURATE 1
+
+#define BRW_MATH_DATA_VECTOR 0
+#define BRW_MATH_DATA_SCALAR 1
+
+#define BRW_URB_OPCODE_WRITE 0
+
+#define BRW_URB_SWIZZLE_NONE 0
+#define BRW_URB_SWIZZLE_INTERLEAVE 1
+#define BRW_URB_SWIZZLE_TRANSPOSE 2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K 0
+#define BRW_SCRATCH_SPACE_SIZE_2K 1
+#define BRW_SCRATCH_SPACE_SIZE_4K 2
+#define BRW_SCRATCH_SPACE_SIZE_8K 3
+#define BRW_SCRATCH_SPACE_SIZE_16K 4
+#define BRW_SCRATCH_SPACE_SIZE_32K 5
+#define BRW_SCRATCH_SPACE_SIZE_64K 6
+#define BRW_SCRATCH_SPACE_SIZE_128K 7
+#define BRW_SCRATCH_SPACE_SIZE_256K 8
+#define BRW_SCRATCH_SPACE_SIZE_512K 9
+#define BRW_SCRATCH_SPACE_SIZE_1M 10
+#define BRW_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CONST_BUFFER_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_INSN_POINTER 0x6102
+#define CMD_PIPELINE_SELECT 0x6104
+
+#define CMD_PIPELINED_STATE_POINTERS 0x7800
+#define CMD_BINDING_TABLE_PTRS 0x7801
+#define CMD_VERTEX_BUFFER 0x7808
+#define CMD_VERTEX_ELEMENT 0x7809
+#define CMD_INDEX_BUFFER 0x780a
+#define CMD_VF_STATISTICS 0x780b
+
+#define CMD_DRAW_RECT 0x7900
+#define CMD_BLEND_CONSTANT_COLOR 0x7901
+#define CMD_CHROMA_KEY 0x7904
+#define CMD_DEPTH_BUFFER 0x7905
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+#define CMD_POLY_STIPPLE_PATTERN 0x7907
+#define CMD_LINE_STIPPLE_PATTERN 0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_3D_PRIM 0x7b00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END 0x1
+#define R02_PRIM_START 0x2
+
+
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_draw.c b/src/mesa/pipe/i965simple/brw_draw.c
new file mode 100644
index 0000000000..f443f41c6f
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_draw.c
@@ -0,0 +1,235 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdlib.h>
+
+#include "brw_batch.h"
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_winsys.h"
+
+static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = {
+ _3DPRIM_POINTLIST,
+ _3DPRIM_LINELIST,
+ _3DPRIM_LINELOOP,
+ _3DPRIM_LINESTRIP,
+ _3DPRIM_TRILIST,
+ _3DPRIM_TRISTRIP,
+ _3DPRIM_TRIFAN,
+ _3DPRIM_QUADLIST,
+ _3DPRIM_QUADSTRIP,
+ _3DPRIM_POLYGON
+};
+
+
+static const int reduced_prim[PIPE_PRIM_POLYGON+1] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES
+};
+
+
+/* When the primitive changes, set a state bit and re-validate. Not
+ * the nicest and would rather deal with this by having all the
+ * programs be immune to the active primitive (ie. cope with all
+ * possibilities). That may not be realistic however.
+ */
+static void brw_set_prim(struct brw_context *brw, int prim)
+{
+ PRINT("PRIM: %d\n", prim);
+
+ /* Slight optimization to avoid the GS program when not needed:
+ */
+ if (prim == PIPE_PRIM_QUAD_STRIP &&
+ brw->attribs.Raster->flatshade &&
+ brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_FILL &&
+ brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_FILL)
+ prim = PIPE_PRIM_TRIANGLE_STRIP;
+
+ if (prim != brw->primitive) {
+ brw->primitive = prim;
+ brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+
+ if (reduced_prim[prim] != brw->reduced_primitive) {
+ brw->reduced_primitive = reduced_prim[prim];
+ brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
+ }
+
+ brw_validate_state(brw);
+ }
+
+}
+
+
+static unsigned trim(int prim, unsigned length)
+{
+ if (prim == PIPE_PRIM_QUAD_STRIP)
+ return length > 3 ? (length - length % 2) : 0;
+ else if (prim == PIPE_PRIM_QUADS)
+ return length - length % 4;
+ else
+ return length;
+}
+
+
+
+static boolean brw_emit_prim( struct brw_context *brw,
+ boolean indexed,
+ unsigned start,
+ unsigned count )
+
+{
+ struct brw_3d_primitive prim_packet;
+
+ if (BRW_DEBUG & DEBUG_PRIMS)
+ PRINT("PRIM: %d %d %d\n", brw->primitive, start, count);
+
+ prim_packet.header.opcode = CMD_3D_PRIM;
+ prim_packet.header.length = sizeof(prim_packet)/4 - 2;
+ prim_packet.header.pad = 0;
+ prim_packet.header.topology = hw_prim[brw->primitive];
+ prim_packet.header.indexed = indexed;
+
+ prim_packet.verts_per_instance = trim(brw->primitive, count);
+ prim_packet.start_vert_location = start;
+ prim_packet.instance_count = 1;
+ prim_packet.start_instance_location = 0;
+ prim_packet.base_vert_location = 0;
+
+ if (prim_packet.verts_per_instance == 0)
+ return TRUE;
+
+ return brw_batchbuffer_data( brw->winsys,
+ &prim_packet,
+ sizeof(prim_packet) );
+}
+
+
+/* May fail if out of video memory for texture or vbo upload, or on
+ * fallback conditions.
+ */
+static boolean brw_try_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer_handle *index_buffer,
+ unsigned index_size,
+ unsigned mode,
+ unsigned start,
+ unsigned count )
+{
+ struct brw_context *brw = brw_context(pipe);
+ boolean retval = FALSE;
+
+ /* Set the first primitive ahead of validate_state:
+ */
+ brw_set_prim(brw, mode);
+
+ /* Upload index, vertex data:
+ */
+ if (!brw_upload_indices( brw, index_buffer, index_size, start, count ))
+ return FALSE;
+
+ if (!brw_upload_vertex_elements( brw ))
+ return FALSE;
+
+ /* XXX: Need to separate validate and upload of state.
+ */
+ if (brw->state.dirty.brw)
+ brw_validate_state( brw );
+
+ if (brw_emit_prim(brw, TRUE, start, count))
+ return FALSE;
+
+ return TRUE;
+}
+
+
+
+static boolean brw_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer_handle *indexBuffer,
+ unsigned indexSize,
+ unsigned mode,
+ unsigned start,
+ unsigned count )
+{
+ if (!brw_try_draw_elements( pipe,
+ indexBuffer,
+ indexSize,
+ mode, start, count ))
+ {
+ /* flush ? */
+
+ if (!brw_try_draw_elements( pipe,
+ indexBuffer,
+ indexSize,
+ mode, start,
+ count )) {
+ assert(0);
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+static boolean brw_draw_arrays( struct pipe_context *pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count )
+{
+ if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) {
+ /* flush ? */
+
+ if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) {
+ assert(0);
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+void brw_init_draw_functions( struct brw_context *brw )
+{
+ brw->pipe.draw_arrays = brw_draw_arrays;
+ brw->pipe.draw_elements = brw_draw_elements;
+}
+
+
diff --git a/src/mesa/pipe/i965simple/brw_draw.h b/src/mesa/pipe/i965simple/brw_draw.h
new file mode 100644
index 0000000000..053f2efb9d
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_draw.h
@@ -0,0 +1,55 @@
+ /**************************************************************************
+ *
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_DRAW_H
+#define BRW_DRAW_H
+
+#include "pipe/p_context.h"
+
+struct brw_context;
+
+
+
+void brw_init_draw_functions( struct brw_context *brw );
+
+
+boolean brw_upload_vertices( struct brw_context *brw,
+ unsigned min_index,
+ unsigned max_index );
+
+boolean brw_upload_indices(struct brw_context *brw,
+ const struct pipe_buffer_handle *index_buffer,
+ int ib_size, int start, int count);
+
+boolean brw_upload_vertex_buffers( struct brw_context *brw );
+boolean brw_upload_vertex_elements( struct brw_context *brw );
+
+unsigned brw_translate_surface_format( unsigned id );
+
+
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_draw_upload.c b/src/mesa/pipe/i965simple/brw_draw_upload.c
new file mode 100644
index 0000000000..186a6274fa
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_draw_upload.c
@@ -0,0 +1,306 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdlib.h>
+
+#include "brw_batch.h"
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+
+struct brw_array_state {
+ union header_union header;
+
+ struct {
+ union {
+ struct {
+ unsigned pitch:11;
+ unsigned pad:15;
+ unsigned access_type:1;
+ unsigned vb_index:5;
+ } bits;
+ unsigned dword;
+ } vb0;
+
+ struct pipe_buffer_handle *buffer;
+ unsigned offset;
+
+ unsigned max_index;
+ unsigned instance_data_step_rate;
+
+ } vb[BRW_VBP_MAX];
+};
+
+
+
+unsigned brw_translate_surface_format( unsigned id )
+{
+ switch (id) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return BRW_SURFACEFORMAT_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return BRW_SURFACEFORMAT_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return BRW_SURFACEFORMAT_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return BRW_SURFACEFORMAT_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return BRW_SURFACEFORMAT_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return BRW_SURFACEFORMAT_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return BRW_SURFACEFORMAT_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return BRW_SURFACEFORMAT_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return BRW_SURFACEFORMAT_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return BRW_SURFACEFORMAT_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return BRW_SURFACEFORMAT_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return BRW_SURFACEFORMAT_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return BRW_SURFACEFORMAT_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return BRW_SURFACEFORMAT_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned get_index_type(int type)
+{
+ switch (type) {
+ case 1: return BRW_INDEX_BYTE;
+ case 2: return BRW_INDEX_WORD;
+ case 4: return BRW_INDEX_DWORD;
+ default: assert(0); return 0;
+ }
+}
+
+
+boolean brw_upload_vertex_buffers( struct brw_context *brw )
+{
+ struct brw_array_state vbp;
+ unsigned i;
+ int nr_enabled = brw->vb.last_vb + 1;
+
+ memset(&vbp, 0, sizeof(vbp));
+
+ /* This is a hardware limit:
+ */
+ if (nr_enabled >= BRW_VEP_MAX)
+ return FALSE;
+
+ for (i = 0; i < nr_enabled; i++)
+ {
+ vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i].pitch;
+ vbp.vb[i].vb0.bits.pad = 0;
+ vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
+ vbp.vb[i].vb0.bits.vb_index = i;
+ vbp.vb[i].offset = brw->vb.vbo_array[i].buffer_offset;
+ vbp.vb[i].buffer = brw->vb.vbo_array[i].buffer;
+ vbp.vb[i].max_index = brw->vb.vbo_array[i].max_index;
+ }
+
+
+ vbp.header.bits.length = (1 + nr_enabled * 4) - 2;
+ vbp.header.bits.opcode = CMD_VERTEX_BUFFER;
+
+ BEGIN_BATCH(vbp.header.bits.length+2, 0);
+ OUT_BATCH( vbp.header.dword );
+
+ for (i = 0; i < nr_enabled; i++) {
+ OUT_BATCH( vbp.vb[i].vb0.dword );
+ OUT_RELOC( vbp.vb[i].buffer, PIPE_BUFFER_FLAG_READ,
+ vbp.vb[i].offset);
+ OUT_BATCH( vbp.vb[i].max_index );
+ OUT_BATCH( vbp.vb[i].instance_data_step_rate );
+ }
+ ADVANCE_BATCH();
+ return TRUE;
+}
+
+
+
+boolean brw_upload_vertex_elements( struct brw_context *brw )
+{
+ struct brw_vertex_element_packet vep;
+
+ unsigned i;
+ unsigned nr_enabled = brw->attribs.VertexProgram->program.num_inputs;
+
+ memset(&vep, 0, sizeof(vep));
+
+ for (i = 0; i < nr_enabled; i++) {
+ struct brw_vertex_element *input = &brw->vb.inputs[i];
+
+ switch (brw->vb.vbo_array[input->vep.ve0.vertex_buffer_index].pitch) {
+ case 0: input->vep.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0;
+ case 1: input->vep.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0;
+ case 2: input->vep.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0;
+ case 3: input->vep.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT;
+ break;
+ }
+ vep.ve[i] = input->vep;
+ }
+
+
+ vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2;
+ vep.header.opcode = CMD_VERTEX_ELEMENT;
+ brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0]));
+
+ return TRUE;
+}
+
+boolean brw_upload_indices( struct brw_context *brw,
+ const struct pipe_buffer_handle *index_buffer,
+ int ib_size, int start, int count)
+{
+ /* Emit the indexbuffer packet:
+ */
+ {
+ struct brw_indexbuffer ib;
+
+ memset(&ib, 0, sizeof(ib));
+
+ ib.header.bits.opcode = CMD_INDEX_BUFFER;
+ ib.header.bits.length = sizeof(ib)/4 - 2;
+ ib.header.bits.index_format = get_index_type(ib_size);
+ ib.header.bits.cut_index_enable = 0;
+
+
+ BEGIN_BATCH(4, 0);
+ OUT_BATCH( ib.header.dword );
+ OUT_RELOC( index_buffer, PIPE_BUFFER_FLAG_READ, start);
+ OUT_RELOC( index_buffer, PIPE_BUFFER_FLAG_READ, start + count);
+ OUT_BATCH( 0 );
+ ADVANCE_BATCH();
+ }
+ return TRUE;
+}
diff --git a/src/mesa/pipe/i965simple/brw_eu.c b/src/mesa/pipe/i965simple/brw_eu.c
new file mode 100644
index 0000000000..e2002d1821
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_eu.c
@@ -0,0 +1,130 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+/* How does predicate control work when execution_size != 8? Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value )
+{
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ if (value != 0xff) {
+ if (value != p->flag_value) {
+ brw_push_insn_state(p);
+ brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
+ p->flag_value = value;
+ brw_pop_insn_state(p);
+ }
+
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+}
+
+void brw_set_predicate_control( struct brw_compile *p, unsigned pc )
+{
+ p->current->header.predicate_control = pc;
+}
+
+void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional )
+{
+ p->current->header.destreg__conditonalmod = conditional;
+}
+
+void brw_set_access_mode( struct brw_compile *p, unsigned access_mode )
+{
+ p->current->header.access_mode = access_mode;
+}
+
+void brw_set_compression_control( struct brw_compile *p, boolean compression_control )
+{
+ p->current->header.compression_control = compression_control;
+}
+
+void brw_set_mask_control( struct brw_compile *p, unsigned value )
+{
+ p->current->header.mask_control = value;
+}
+
+void brw_set_saturate( struct brw_compile *p, unsigned value )
+{
+ p->current->header.saturate = value;
+}
+
+void brw_push_insn_state( struct brw_compile *p )
+{
+ assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+ memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+ p->current++;
+}
+
+void brw_pop_insn_state( struct brw_compile *p )
+{
+ assert(p->current != p->stack);
+ p->current--;
+}
+
+
+/***********************************************************************
+ */
+void brw_init_compile( struct brw_compile *p )
+{
+ p->nr_insn = 0;
+ p->current = p->stack;
+ memset(p->current, 0, sizeof(p->current[0]));
+
+ /* Some defaults?
+ */
+ brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+ brw_set_saturate(p, 0);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+
+const unsigned *brw_get_program( struct brw_compile *p,
+ unsigned *sz )
+{
+ unsigned i;
+
+ for (i = 0; i < 8; i++)
+ brw_NOP(p);
+
+ *sz = p->nr_insn * sizeof(struct brw_instruction);
+ return (const unsigned *)p->store;
+}
+
diff --git a/src/mesa/pipe/i965simple/brw_eu.h b/src/mesa/pipe/i965simple/brw_eu.h
new file mode 100644
index 0000000000..111edb1506
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_eu.h
@@ -0,0 +1,877 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include "brw_structs.h"
+#include "brw_defines.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
+
+
+#define REG_SIZE (8*4)
+
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges. Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg
+{
+ unsigned type:4;
+ unsigned file:2;
+ unsigned nr:8;
+ unsigned subnr:5; /* :1 in align16 */
+ unsigned negate:1; /* source only */
+ unsigned abs:1; /* source only */
+ unsigned vstride:4; /* source only */
+ unsigned width:3; /* src only, align1 only */
+ unsigned hstride:2; /* src only, align1 only */
+ unsigned address_mode:1; /* relative addressing, hopefully! */
+ unsigned pad0:1;
+
+ union {
+ struct {
+ unsigned swizzle:8; /* src only, align16 only */
+ unsigned writemask:4; /* dest only, align16 only */
+ int indirect_offset:10; /* relative addressing offset */
+ unsigned pad1:10; /* two dwords total */
+ } bits;
+
+ float f;
+ int d;
+ unsigned ud;
+ } dw1;
+};
+
+
+struct brw_indirect {
+ unsigned addr_subnr:4;
+ int addr_offset:10;
+ unsigned pad:18;
+};
+
+
+#define BRW_EU_MAX_INSN_STACK 5
+#define BRW_EU_MAX_INSN 1200
+
+struct brw_compile {
+ struct brw_instruction store[BRW_EU_MAX_INSN];
+ unsigned nr_insn;
+
+ /* Allow clients to push/pop instruction state:
+ */
+ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ struct brw_instruction *current;
+
+ unsigned flag_value;
+ boolean single_program_flow;
+};
+
+
+
+static __inline int type_sz( unsigned type )
+{
+ switch( type ) {
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_F:
+ return 4;
+ case BRW_REGISTER_TYPE_HF:
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_W:
+ return 2;
+ case BRW_REGISTER_TYPE_UB:
+ case BRW_REGISTER_TYPE_B:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static __inline struct brw_reg brw_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr,
+ unsigned type,
+ unsigned vstride,
+ unsigned width,
+ unsigned hstride,
+ unsigned swizzle,
+ unsigned writemask)
+{
+
+ struct brw_reg reg;
+ reg.type = type;
+ reg.file = file;
+ reg.nr = nr;
+ reg.subnr = subnr * type_sz(type);
+ reg.negate = 0;
+ reg.abs = 0;
+ reg.vstride = vstride;
+ reg.width = width;
+ reg.hstride = hstride;
+ reg.address_mode = BRW_ADDRESS_DIRECT;
+ reg.pad0 = 0;
+
+ /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+ * set swizzle and writemask to W, as the lower bits of subnr will
+ * be lost when converted to align16. This is probably too much to
+ * keep track of as you'd want it adjusted by suboffset(), etc.
+ * Perhaps fix up when converting to align16?
+ */
+ reg.dw1.bits.swizzle = swizzle;
+ reg.dw1.bits.writemask = writemask;
+ reg.dw1.bits.indirect_offset = 0;
+ reg.dw1.bits.pad1 = 0;
+ return reg;
+}
+
+static __inline struct brw_reg brw_vec16_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_16,
+ BRW_WIDTH_16,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ TGSI_WRITEMASK_XYZW);
+}
+
+static __inline struct brw_reg brw_vec8_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ TGSI_WRITEMASK_XYZW);
+}
+
+
+static __inline struct brw_reg brw_vec4_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ TGSI_WRITEMASK_XYZW);
+}
+
+
+static __inline struct brw_reg brw_vec2_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYXY,
+ TGSI_WRITEMASK_XY);
+}
+
+static __inline struct brw_reg brw_vec1_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ TGSI_WRITEMASK_X);
+}
+
+
+static __inline struct brw_reg retype( struct brw_reg reg,
+ unsigned type )
+{
+ reg.type = type;
+ return reg;
+}
+
+static __inline struct brw_reg suboffset( struct brw_reg reg,
+ unsigned delta )
+{
+ reg.subnr += delta * type_sz(reg.type);
+ return reg;
+}
+
+
+static __inline struct brw_reg offset( struct brw_reg reg,
+ unsigned delta )
+{
+ reg.nr += delta;
+ return reg;
+}
+
+
+static __inline struct brw_reg byte_offset( struct brw_reg reg,
+ unsigned bytes )
+{
+ unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+ reg.nr = newoffset / REG_SIZE;
+ reg.subnr = newoffset % REG_SIZE;
+ return reg;
+}
+
+
+static __inline struct brw_reg brw_uw16_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static __inline struct brw_reg brw_uw8_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static __inline struct brw_reg brw_uw1_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static __inline struct brw_reg brw_imm_reg( unsigned type )
+{
+ return brw_reg( BRW_IMMEDIATE_VALUE,
+ 0,
+ 0,
+ type,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ 0,
+ 0);
+}
+
+static __inline struct brw_reg brw_imm_f( float f )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+ imm.dw1.f = f;
+ return imm;
+}
+
+static __inline struct brw_reg brw_imm_d( int d )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+ imm.dw1.d = d;
+ return imm;
+}
+
+static __inline struct brw_reg brw_imm_ud( unsigned ud )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+ imm.dw1.ud = ud;
+ return imm;
+}
+
+static __inline struct brw_reg brw_imm_uw( ushort uw )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+ imm.dw1.ud = uw;
+ return imm;
+}
+
+static __inline struct brw_reg brw_imm_w( short w )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+ imm.dw1.d = w;
+ return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/* Vector of eight signed half-byte values:
+ */
+static __inline struct brw_reg brw_imm_v( unsigned v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_8;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+/* Vector of four 8-bit float values:
+ */
+static __inline struct brw_reg brw_imm_vf( unsigned v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE 0x30
+#define VF_NEG (1<<7)
+
+static __inline struct brw_reg brw_imm_vf4( unsigned v0,
+ unsigned v1,
+ unsigned v2,
+ unsigned v3)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = ((v0 << 0) |
+ (v1 << 8) |
+ (v2 << 16) |
+ (v3 << 24));
+ return imm;
+}
+
+
+static __inline struct brw_reg brw_address( struct brw_reg reg )
+{
+ return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+
+static __inline struct brw_reg brw_vec1_grf( unsigned nr,
+ unsigned subnr )
+{
+ return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_vec8_grf( unsigned nr,
+ unsigned subnr )
+{
+ return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_vec4_grf( unsigned nr,
+ unsigned subnr )
+{
+ return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static __inline struct brw_reg brw_vec2_grf( unsigned nr,
+ unsigned subnr )
+{
+ return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_uw8_grf( unsigned nr,
+ unsigned subnr )
+{
+ return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_null_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL,
+ 0);
+}
+
+static __inline struct brw_reg brw_address_reg( unsigned subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS,
+ subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw. This goes against the convention for other scalar
+ * regs:
+ */
+static __inline struct brw_reg brw_ip_reg( void )
+{
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_IP,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_4, /* ? */
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, /* NOTE! */
+ TGSI_WRITEMASK_XYZW); /* NOTE! */
+}
+
+static __inline struct brw_reg brw_acc_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ACCUMULATOR,
+ 0);
+}
+
+
+static __inline struct brw_reg brw_flag_reg( void )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_FLAG,
+ 0);
+}
+
+
+static __inline struct brw_reg brw_mask_reg( unsigned subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_MASK,
+ subnr);
+}
+
+static __inline struct brw_reg brw_message_reg( unsigned nr )
+{
+ return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
+ nr,
+ 0);
+}
+
+
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static __inline unsigned cvt( unsigned val )
+{
+ switch (val) {
+ case 0: return 0;
+ case 1: return 1;
+ case 2: return 2;
+ case 4: return 3;
+ case 8: return 4;
+ case 16: return 5;
+ case 32: return 6;
+ }
+ return 0;
+}
+
+static __inline struct brw_reg stride( struct brw_reg reg,
+ unsigned vstride,
+ unsigned width,
+ unsigned hstride )
+{
+
+ reg.vstride = cvt(vstride);
+ reg.width = cvt(width) - 1;
+ reg.hstride = cvt(hstride);
+ return reg;
+}
+
+static __inline struct brw_reg vec16( struct brw_reg reg )
+{
+ return stride(reg, 16,16,1);
+}
+
+static __inline struct brw_reg vec8( struct brw_reg reg )
+{
+ return stride(reg, 8,8,1);
+}
+
+static __inline struct brw_reg vec4( struct brw_reg reg )
+{
+ return stride(reg, 4,4,1);
+}
+
+static __inline struct brw_reg vec2( struct brw_reg reg )
+{
+ return stride(reg, 2,2,1);
+}
+
+static __inline struct brw_reg vec1( struct brw_reg reg )
+{
+ return stride(reg, 0,1,0);
+}
+
+static __inline struct brw_reg get_element( struct brw_reg reg, unsigned elt )
+{
+ return vec1(suboffset(reg, elt));
+}
+
+static __inline struct brw_reg get_element_ud( struct brw_reg reg, unsigned elt )
+{
+ return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+
+static __inline struct brw_reg brw_swizzle( struct brw_reg reg,
+ unsigned x,
+ unsigned y,
+ unsigned z,
+ unsigned w)
+{
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+ return reg;
+}
+
+
+static __inline struct brw_reg brw_swizzle1( struct brw_reg reg,
+ unsigned x )
+{
+ return brw_swizzle(reg, x, x, x, x);
+}
+
+static __inline struct brw_reg brw_writemask( struct brw_reg reg,
+ unsigned mask )
+{
+ reg.dw1.bits.writemask &= mask;
+ return reg;
+}
+
+static __inline struct brw_reg brw_set_writemask( struct brw_reg reg,
+ unsigned mask )
+{
+ reg.dw1.bits.writemask = mask;
+ return reg;
+}
+
+static __inline struct brw_reg negate( struct brw_reg reg )
+{
+ reg.negate ^= 1;
+ return reg;
+}
+
+static __inline struct brw_reg brw_abs( struct brw_reg reg )
+{
+ reg.abs = 1;
+ return reg;
+}
+
+/***********************************************************************
+ */
+static __inline struct brw_reg brw_vec4_indirect( unsigned subnr,
+ int offset )
+{
+ struct brw_reg reg = brw_vec4_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static __inline struct brw_reg brw_vec1_indirect( unsigned subnr,
+ int offset )
+{
+ struct brw_reg reg = brw_vec1_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static __inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
+{
+ return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static __inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
+{
+ return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static __inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static __inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
+{
+ return brw_address_reg(ptr.addr_subnr);
+}
+
+static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, int offset )
+{
+ ptr.addr_offset += offset;
+ return ptr;
+}
+
+static __inline struct brw_indirect brw_indirect( unsigned addr_subnr, int offset )
+{
+ struct brw_indirect ptr;
+ ptr.addr_subnr = addr_subnr;
+ ptr.addr_offset = offset;
+ ptr.pad = 0;
+ return ptr;
+}
+
+static __inline struct brw_instruction *current_insn( struct brw_compile *p)
+{
+ return &p->store[p->nr_insn];
+}
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, unsigned value );
+void brw_set_saturate( struct brw_compile *p, unsigned value );
+void brw_set_access_mode( struct brw_compile *p, unsigned access_mode );
+void brw_set_compression_control( struct brw_compile *p, boolean control );
+void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value );
+void brw_set_predicate_control( struct brw_compile *p, unsigned pc );
+void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional );
+
+void brw_init_compile( struct brw_compile *p );
+const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz );
+
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0);
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+#undef ALU1
+#undef ALU2
+
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ boolean allocate,
+ boolean used,
+ unsigned msg_length,
+ unsigned response_length,
+ boolean eot,
+ boolean writes_complete,
+ unsigned offset,
+ unsigned swizzle);
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ unsigned msg_length,
+ unsigned response_length,
+ boolean eot);
+
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ unsigned sampler,
+ unsigned writemask,
+ unsigned msg_type,
+ unsigned response_length,
+ unsigned msg_length,
+ boolean eot);
+
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ unsigned saturate,
+ unsigned msg_reg_nr,
+ struct brw_reg src,
+ unsigned precision );
+
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ unsigned saturate,
+ unsigned msg_reg_nr,
+ struct brw_reg src,
+ unsigned data_type,
+ unsigned precision );
+
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ unsigned scratch_offset );
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ unsigned msg_reg_nr,
+ unsigned scratch_offset );
+
+/* If/else/endif. Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p,
+ unsigned execute_size);
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn);
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *if_or_else_insn);
+
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+ unsigned execute_size);
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *patch_insn);
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned conditional,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+void brw_print_reg( struct brw_reg reg );
+
+
+/***********************************************************************
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ unsigned count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ unsigned count);
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count);
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count);
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src);
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg );
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_eu_debug.c b/src/mesa/pipe/i965simple/brw_eu_debug.c
new file mode 100644
index 0000000000..be692f6502
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_eu_debug.c
@@ -0,0 +1,88 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_eu.h"
+
+void brw_print_reg( struct brw_reg hwreg )
+{
+ static const char *file[] = {
+ "arf",
+ "grf",
+ "msg",
+ "imm"
+ };
+
+ static const char *type[] = {
+ "ud",
+ "d",
+ "uw",
+ "w",
+ "ub",
+ "vf",
+ "hf",
+ "f"
+ };
+
+ _mesa_printf("%s%s",
+ hwreg.abs ? "abs/" : "",
+ hwreg.negate ? "-" : "");
+
+ if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.nr % 2 == 0 &&
+ hwreg.subnr == 0 &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_8 &&
+ hwreg.width == BRW_WIDTH_8 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ _mesa_printf("vec%d", hwreg.nr);
+ }
+ else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
+ hwreg.width == BRW_WIDTH_1 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+ }
+ else {
+ _mesa_printf("%s%d.%d<%d;%d,%d>:%s",
+ file[hwreg.file],
+ hwreg.nr,
+ hwreg.subnr / type_sz(hwreg.type),
+ hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0,
+ 1<<hwreg.width,
+ hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,
+ type[hwreg.type]);
+ }
+}
+
+
+
diff --git a/src/mesa/pipe/i965simple/brw_eu_emit.c b/src/mesa/pipe/i965simple/brw_eu_emit.c
new file mode 100644
index 0000000000..bda63e8b9a
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_eu_emit.c
@@ -0,0 +1,1080 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+
+static void guess_execution_size( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ if (reg.width == BRW_WIDTH_8 &&
+ insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
+ insn->header.execution_size = BRW_EXECUTE_16;
+ else
+ insn->header.execution_size = reg.width; /* note - definitions are compatible */
+}
+
+
+static void brw_set_dest( struct brw_instruction *insn,
+ struct brw_reg dest )
+{
+ insn->bits1.da1.dest_reg_file = dest.file;
+ insn->bits1.da1.dest_reg_type = dest.type;
+ insn->bits1.da1.dest_address_mode = dest.address_mode;
+
+ if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+ insn->bits1.da1.dest_reg_nr = dest.nr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.da1.dest_subreg_nr = dest.subnr;
+ insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+ }
+ else {
+ insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+ insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+ }
+ }
+ else {
+ insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+
+ /* These are different sizes in align1 vs align16:
+ */
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+ }
+ else {
+ insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ }
+ }
+
+ /* NEW: Set the execution size based on dest.width and
+ * insn->compression_control:
+ */
+ guess_execution_size(insn, dest);
+}
+
+static void brw_set_src0( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ insn->bits1.da1.src0_reg_file = reg.file;
+ insn->bits1.da1.src0_reg_type = reg.type;
+ insn->bits2.da1.src0_abs = reg.abs;
+ insn->bits2.da1.src0_negate = reg.negate;
+ insn->bits2.da1.src0_address_mode = reg.address_mode;
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+
+ /* Required to set some fields in src1 as well:
+ */
+ insn->bits1.da1.src1_reg_file = 0; /* arf */
+ insn->bits1.da1.src1_reg_type = reg.type;
+ }
+ else
+ {
+ if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.da1.src0_subreg_nr = reg.subnr;
+ insn->bits2.da1.src0_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+ insn->bits2.da16.src0_reg_nr = reg.nr;
+ }
+ }
+ else {
+ insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
+ }
+ else {
+ insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+ }
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits2.da1.src0_width = BRW_WIDTH_1;
+ insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits2.da1.src0_horiz_stride = reg.hstride;
+ insn->bits2.da1.src0_width = reg.width;
+ insn->bits2.da1.src0_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits2.da16.src0_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ insn->bits1.da1.src1_reg_file = reg.file;
+ insn->bits1.da1.src1_reg_type = reg.type;
+ insn->bits3.da1.src1_abs = reg.abs;
+ insn->bits3.da1.src1_negate = reg.negate;
+
+ /* Only src1 can be immediate in two-argument instructions.
+ */
+ assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+ }
+ else {
+ /* This is a hardware restriction, which may or may not be lifted
+ * in the future:
+ */
+ assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+ //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits3.da1.src1_subreg_nr = reg.subnr;
+ insn->bits3.da1.src1_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+ insn->bits3.da16.src1_reg_nr = reg.nr;
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits3.da1.src1_width = BRW_WIDTH_1;
+ insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits3.da1.src1_horiz_stride = reg.hstride;
+ insn->bits3.da1.src1_width = reg.width;
+ insn->bits3.da1.src1_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits3.da16.src1_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+
+static void brw_set_math_message( struct brw_instruction *insn,
+ unsigned msg_length,
+ unsigned response_length,
+ unsigned function,
+ unsigned integer_type,
+ boolean low_precision,
+ boolean saturate,
+ unsigned dataType )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.math.function = function;
+ insn->bits3.math.int_type = integer_type;
+ insn->bits3.math.precision = low_precision;
+ insn->bits3.math.saturate = saturate;
+ insn->bits3.math.data_type = dataType;
+ insn->bits3.math.response_length = response_length;
+ insn->bits3.math.msg_length = msg_length;
+ insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
+ insn->bits3.math.end_of_thread = 0;
+}
+
+static void brw_set_urb_message( struct brw_instruction *insn,
+ boolean allocate,
+ boolean used,
+ unsigned msg_length,
+ unsigned response_length,
+ boolean end_of_thread,
+ boolean complete,
+ unsigned offset,
+ unsigned swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.urb.opcode = 0; /* ? */
+ insn->bits3.urb.offset = offset;
+ insn->bits3.urb.swizzle_control = swizzle_control;
+ insn->bits3.urb.allocate = allocate;
+ insn->bits3.urb.used = used; /* ? */
+ insn->bits3.urb.complete = complete;
+ insn->bits3.urb.response_length = response_length;
+ insn->bits3.urb.msg_length = msg_length;
+ insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
+ insn->bits3.urb.end_of_thread = end_of_thread;
+}
+
+static void brw_set_dp_write_message( struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned msg_control,
+ unsigned msg_type,
+ unsigned msg_length,
+ unsigned pixel_scoreboard_clear,
+ unsigned response_length,
+ unsigned end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.dp_write.binding_table_index = binding_table_index;
+ insn->bits3.dp_write.msg_control = msg_control;
+ insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write.msg_type = msg_type;
+ insn->bits3.dp_write.send_commit_msg = 0;
+ insn->bits3.dp_write.response_length = response_length;
+ insn->bits3.dp_write.msg_length = msg_length;
+ insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits3.urb.end_of_thread = end_of_thread;
+}
+
+static void brw_set_dp_read_message( struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned msg_control,
+ unsigned msg_type,
+ unsigned target_cache,
+ unsigned msg_length,
+ unsigned response_length,
+ unsigned end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.dp_read.binding_table_index = binding_table_index;
+ insn->bits3.dp_read.msg_control = msg_control;
+ insn->bits3.dp_read.msg_type = msg_type;
+ insn->bits3.dp_read.target_cache = target_cache;
+ insn->bits3.dp_read.response_length = response_length;
+ insn->bits3.dp_read.msg_length = msg_length;
+ insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits3.dp_read.end_of_thread = end_of_thread;
+}
+
+static void brw_set_sampler_message( struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned sampler,
+ unsigned msg_type,
+ unsigned response_length,
+ unsigned msg_length,
+ boolean eot)
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.sampler.binding_table_index = binding_table_index;
+ insn->bits3.sampler.sampler = sampler;
+ insn->bits3.sampler.msg_type = msg_type;
+ insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+ insn->bits3.sampler.response_length = response_length;
+ insn->bits3.sampler.msg_length = msg_length;
+ insn->bits3.sampler.end_of_thread = eot;
+ insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+}
+
+
+
+static struct brw_instruction *next_insn( struct brw_compile *p,
+ unsigned opcode )
+{
+ struct brw_instruction *insn;
+
+ assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
+
+ insn = &p->store[p->nr_insn++];
+ memcpy(insn, p->current, sizeof(*insn));
+
+ /* Reset this one-shot flag:
+ */
+
+ if (p->current->header.destreg__conditonalmod) {
+ p->current->header.destreg__conditonalmod = 0;
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+
+ insn->header.opcode = opcode;
+ return insn;
+}
+
+
+static struct brw_instruction *brw_alu1( struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ return insn;
+}
+
+static struct brw_instruction *brw_alu2(struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1 )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+ return insn;
+}
+
+
+/***********************************************************************
+ * Convenience routines.
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0) \
+{ \
+ return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
+}
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1) \
+{ \
+ return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
+}
+
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+
+
+
+void brw_NOP(struct brw_compile *p)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_ud(0x0));
+}
+
+
+
+
+
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+
+struct brw_instruction *brw_JMPI(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack). Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off. If the stack is now empty, normal execution resumes.
+ *
+ * No attempt is made to deal with stack overflow (14 elements?).
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, unsigned execute_size)
+{
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow) {
+ assert(execute_size == BRW_EXECUTE_1);
+
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ insn->header.predicate_inverse = 1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_IF);
+ }
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.execution_size = execute_size;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn)
+{
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ } else {
+ insn = next_insn(p, BRW_OPCODE_ELSE);
+ }
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = if_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+
+ /* Patch the if instruction to point at this instruction.
+ */
+ if (p->single_program_flow) {
+ assert(if_insn->header.opcode == BRW_OPCODE_ADD);
+
+ if_insn->bits3.ud = (insn - if_insn + 1) * 16;
+ } else {
+ assert(if_insn->header.opcode == BRW_OPCODE_IF);
+
+ if_insn->bits3.if_else.jump_count = insn - if_insn;
+ if_insn->bits3.if_else.pop_count = 1;
+ if_insn->bits3.if_else.pad0 = 0;
+ }
+
+ return insn;
+}
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *patch_insn)
+{
+ if (p->single_program_flow) {
+ /* In single program flow mode, there's no need to execute an ENDIF,
+ * since we don't need to do any stack operations, and if we're executing
+ * currently, we want to just continue executing.
+ */
+ struct brw_instruction *next = &p->store[p->nr_insn];
+
+ assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
+
+ patch_insn->bits3.ud = (next - patch_insn) * 16;
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
+
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = patch_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+
+ assert(patch_insn->bits3.if_else.jump_count == 0);
+
+ /* Patch the if or else instructions to point at this or the next
+ * instruction respectively.
+ */
+ if (patch_insn->header.opcode == BRW_OPCODE_IF) {
+ /* Automagically turn it into an IFF:
+ */
+ patch_insn->header.opcode = BRW_OPCODE_IFF;
+ patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+ patch_insn->bits3.if_else.pop_count = 0;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
+ patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+ patch_insn->bits3.if_else.pop_count = 1;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else {
+ assert(0);
+ }
+
+ /* Also pop item off the stack in the endif instruction:
+ */
+ insn->bits3.if_else.jump_count = 0;
+ insn->bits3.if_else.pop_count = 1;
+ insn->bits3.if_else.pad0 = 0;
+ }
+}
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_BREAK);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+struct brw_instruction *brw_CONT(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+/* DO/WHILE loop:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
+{
+ if (p->single_program_flow) {
+ return &p->store[p->nr_insn];
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_null_reg());
+ brw_set_src0(insn, brw_null_reg());
+ brw_set_src1(insn, brw_null_reg());
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = execute_size;
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ /* insn->header.mask_control = BRW_MASK_ENABLE; */
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ return insn;
+ }
+}
+
+
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *do_insn)
+{
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow)
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ else
+ insn = next_insn(p, BRW_OPCODE_WHILE);
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+ if (p->single_program_flow) {
+ insn->header.execution_size = BRW_EXECUTE_1;
+
+ insn->bits3.d = (do_insn - insn) * 16;
+ } else {
+ insn->header.execution_size = do_insn->header.execution_size;
+
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
+ insn->bits3.if_else.jump_count = do_insn - insn;
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
+
+/* insn->header.mask_control = BRW_MASK_ENABLE; */
+
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+ return insn;
+}
+
+
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn)
+{
+ struct brw_instruction *landing = &p->store[p->nr_insn];
+
+ assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+ assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
+
+ jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
+}
+
+
+
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register. It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned conditional,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+
+ insn->header.destreg__conditonalmod = conditional;
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+
+/* guess_execution_size(insn, src0); */
+
+
+ /* Make it so that future instructions will use the computed flag
+ * value until brw_set_predicate_control_flag_value() is called
+ * again.
+ */
+ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.nr == 0) {
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ p->flag_value = 0xff;
+ }
+}
+
+
+
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+
+/* Invert 8 values
+ */
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ unsigned saturate,
+ unsigned msg_reg_nr,
+ struct brw_reg src,
+ unsigned data_type,
+ unsigned precision )
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* Example code doesn't set predicate_control for send
+ * instructions.
+ */
+ insn->header.predicate_control = 0;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ data_type);
+}
+
+/* Use 2 send instructions to invert 16 elements
+ */
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ unsigned saturate,
+ unsigned msg_reg_nr,
+ struct brw_reg src,
+ unsigned precision )
+{
+ struct brw_instruction *insn;
+ unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* First instruction:
+ */
+ brw_push_insn_state(p);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ /* Second instruction:
+ */
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
+ insn->header.destreg__conditonalmod = msg_reg_nr+1;
+
+ brw_set_dest(insn, offset(dest,1));
+ brw_set_src0(insn, src);
+ brw_set_math_message(insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ brw_pop_insn_state(p);
+}
+
+
+
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ unsigned msg_reg_nr,
+ unsigned scratch_offset )
+{
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ unsigned msg_length = 3;
+ struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+
+ brw_set_dp_write_message(insn,
+ 255, /* bti */
+ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+ msg_length,
+ 0, /* pixel scoreboard */
+ 0, /* response_length */
+ 0); /* eot */
+ }
+
+}
+
+
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ unsigned scratch_offset )
+{
+ {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest); /* UW? */
+ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
+ brw_set_dp_read_message(insn,
+ 255, /* bti */
+ 3, /* msg_control */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 1, /* target cache */
+ 1, /* msg_length */
+ 2, /* response_length */
+ 0); /* eot */
+ }
+}
+
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ unsigned msg_length,
+ unsigned response_length,
+ boolean eot)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_dp_write_message(insn,
+ binding_table_index,
+ BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
+ msg_length,
+ 1, /* pixel scoreboard */
+ response_length,
+ eot);
+}
+
+
+
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ unsigned sampler,
+ unsigned writemask,
+ unsigned msg_type,
+ unsigned response_length,
+ unsigned msg_length,
+ boolean eot)
+{
+ boolean need_stall = 0;
+
+ if(writemask == 0) {
+/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
+ return;
+ }
+
+ /* Hardware doesn't do destination dependency checking on send
+ * instructions properly. Add a workaround which generates the
+ * dependency by other means. In practice it seems like this bug
+ * only crops up for texture samples, and only where registers are
+ * written by the send and then written again later without being
+ * read in between. Luckily for us, we already track that
+ * information and use it to modify the writemask for the
+ * instruction, so that is a guide for whether a workaround is
+ * needed.
+ */
+ if (writemask != TGSI_WRITEMASK_XYZW) {
+ unsigned dst_offset = 0;
+ unsigned i, newmask = 0, len = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i))
+ break;
+ dst_offset += 2;
+ }
+ for (; i < 4; i++) {
+ if (!(writemask & (1<<i)))
+ break;
+ newmask |= 1<<i;
+ len++;
+ }
+
+ if (newmask != writemask) {
+ need_stall = 1;
+/* _mesa_printf("need stall %x %x\n", newmask , writemask); */
+ }
+ else {
+ struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+
+ newmask = ~newmask & TGSI_WRITEMASK_XYZW;
+
+ brw_push_insn_state(p);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p, m1, brw_vec8_grf(0,0));
+ brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
+
+ brw_pop_insn_state(p);
+
+ src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ dest = offset(dest, dst_offset);
+ response_length = len * 2;
+ }
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_sampler_message(insn,
+ binding_table_index,
+ sampler,
+ msg_type,
+ response_length,
+ msg_length,
+ eot);
+ }
+
+ if (need_stall)
+ {
+ struct brw_reg reg = vec8(offset(dest, response_length-1));
+
+ /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
+ */
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, FALSE);
+ brw_MOV(p, reg, reg);
+ brw_pop_insn_state(p);
+ }
+
+}
+
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style. Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ boolean allocate,
+ boolean used,
+ unsigned msg_length,
+ unsigned response_length,
+ boolean eot,
+ boolean writes_complete,
+ unsigned offset,
+ unsigned swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < 16);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_urb_message(insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
+
diff --git a/src/mesa/pipe/i965simple/brw_eu_util.c b/src/mesa/pipe/i965simple/brw_eu_util.c
new file mode 100644
index 0000000000..3a65b141f0
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_eu_util.c
@@ -0,0 +1,126 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ brw_math( p,
+ dst,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ src,
+ BRW_MATH_PRECISION_FULL,
+ BRW_MATH_DATA_VECTOR );
+}
+
+
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count)
+{
+ unsigned i;
+
+ dst = vec4(dst);
+ src = vec4(src);
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
+ }
+}
+
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count)
+{
+ unsigned i;
+
+ dst = vec8(dst);
+ src = vec8(src);
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ }
+}
+
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ unsigned count)
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta));
+ brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
+ }
+}
+
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ unsigned count)
+{
+ unsigned i;
+
+ dst = vec4(dst);
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
+ }
+}
+
+
+
+
diff --git a/src/mesa/pipe/i965simple/brw_flush.c b/src/mesa/pipe/i965simple/brw_flush.c
new file mode 100644
index 0000000000..1f11c5f164
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_flush.c
@@ -0,0 +1,84 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_defines.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_batch.h"
+
+
+/**
+ * In future we may want a fence-like interface instead of finish.
+ */
+static void brw_flush( struct pipe_context *pipe,
+ unsigned flags )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct pipe_fence_handle *fence;
+
+ /* Do we need to emit an MI_FLUSH command to flush the hardware
+ * caches?
+ */
+ if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) {
+ unsigned flush = CMD_MI_FLUSH;
+
+#if 0
+ if (!(flags & PIPE_FLUSH_RENDER_CACHE))
+ flush |= INHIBIT_FLUSH_RENDER_CACHE;
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE)
+ flush |= FLUSH_MAP_CACHE;
+#endif
+
+ if (!BEGIN_BATCH(1, 0)) {
+ FLUSH_BATCH( &fence );
+ assert(BEGIN_BATCH(1, 0));
+ }
+ OUT_BATCH( flush );
+ ADVANCE_BATCH();
+ }
+
+ /* If there are no flags, just flush pending commands to hardware:
+ */
+ FLUSH_BATCH( &fence );
+
+ if (flags & PIPE_FLUSH_WAIT) {
+// brw->winsys->wait_fence(brw->winsys, fence);
+ }
+}
+
+
+
+void brw_init_flush_functions( struct brw_context *brw )
+{
+ brw->pipe.flush = brw_flush;
+}
diff --git a/src/mesa/pipe/i965simple/brw_gs.c b/src/mesa/pipe/i965simple/brw_gs.c
new file mode 100644
index 0000000000..de60868ccc
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_gs.c
@@ -0,0 +1,196 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_gs.h"
+
+
+
+static void compile_gs_prog( struct brw_context *brw,
+ struct brw_gs_prog_key *key )
+{
+ struct brw_gs_compile c;
+ const unsigned *program;
+ unsigned program_size;
+
+ memset(&c, 0, sizeof(c));
+
+ c.key = *key;
+
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.nr_attrs = brw_count_bits(c.key.attrs);
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(&c.func);
+
+ c.func.single_program_flow = 1;
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Note that primitives which don't require a GS program have
+ * already been weeded out by this stage:
+ */
+ switch (key->primitive) {
+ case PIPE_PRIM_QUADS:
+ brw_gs_quads( &c );
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ brw_gs_quad_strip( &c );
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ brw_gs_lines( &c );
+ break;
+ case PIPE_PRIM_LINES:
+ if (key->hint_gs_always)
+ brw_gs_lines( &c );
+ else {
+ return;
+ }
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ if (key->hint_gs_always)
+ brw_gs_tris( &c );
+ else {
+ return;
+ }
+ break;
+ case PIPE_PRIM_POINTS:
+ if (key->hint_gs_always)
+ brw_gs_points( &c );
+ else {
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ /* Upload
+ */
+ brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG],
+ &c.key,
+ sizeof(c.key),
+ program,
+ program_size,
+ &c.prog_data,
+ &brw->gs.prog_data );
+}
+
+
+static boolean search_cache( struct brw_context *brw,
+ struct brw_gs_prog_key *key )
+{
+ return brw_search_cache(&brw->cache[BRW_GS_PROG],
+ key, sizeof(*key),
+ &brw->gs.prog_data,
+ &brw->gs.prog_gs_offset);
+}
+
+
+static const int gs_prim[PIPE_PRIM_POLYGON+1] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINE_LOOP,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_QUADS,
+ PIPE_PRIM_QUAD_STRIP,
+ PIPE_PRIM_TRIANGLES
+};
+
+static void populate_key( struct brw_context *brw,
+ struct brw_gs_prog_key *key )
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_VS_PROG */
+ key->attrs = brw->vs.prog_data->outputs_written;
+
+ /* BRW_NEW_PRIMITIVE */
+ key->primitive = gs_prim[brw->primitive];
+
+ key->hint_gs_always = 0; /* debug code? */
+
+ key->need_gs_prog = (key->hint_gs_always ||
+ brw->primitive == PIPE_PRIM_QUADS ||
+ brw->primitive == PIPE_PRIM_QUAD_STRIP ||
+ brw->primitive == PIPE_PRIM_LINE_LOOP);
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_gs_prog( struct brw_context *brw )
+{
+ struct brw_gs_prog_key key;
+
+ /* Populate the key:
+ */
+ populate_key(brw, &key);
+
+ if (brw->gs.prog_active != key.need_gs_prog) {
+ brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
+ brw->gs.prog_active = key.need_gs_prog;
+ }
+
+ if (brw->gs.prog_active) {
+ if (!search_cache(brw, &key))
+ compile_gs_prog( brw, &key );
+ }
+}
+
+
+const struct brw_tracked_state brw_gs_prog = {
+ .dirty = {
+ .brw = BRW_NEW_PRIMITIVE,
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .update = upload_gs_prog
+};
diff --git a/src/mesa/pipe/i965simple/brw_gs.h b/src/mesa/pipe/i965simple/brw_gs.h
new file mode 100644
index 0000000000..f09141c6aa
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_gs.h
@@ -0,0 +1,75 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_GS_H
+#define BRW_GS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_GS_VERTS (4)
+
+struct brw_gs_prog_key {
+ unsigned attrs:32;
+ unsigned primitive:4;
+ unsigned hint_gs_always:1;
+ unsigned need_gs_prog:1;
+ unsigned pad:26;
+};
+
+struct brw_gs_compile {
+ struct brw_compile func;
+ struct brw_gs_prog_key key;
+ struct brw_gs_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_GS_VERTS];
+ } reg;
+
+ /* 3 different ways of expressing vertex size:
+ */
+ unsigned nr_attrs;
+ unsigned nr_regs;
+ unsigned nr_bytes;
+};
+
+#define ATTR_SIZE (4*4)
+
+void brw_gs_quads( struct brw_gs_compile *c );
+void brw_gs_quad_strip( struct brw_gs_compile *c );
+void brw_gs_tris( struct brw_gs_compile *c );
+void brw_gs_lines( struct brw_gs_compile *c );
+void brw_gs_points( struct brw_gs_compile *c );
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_gs_emit.c b/src/mesa/pipe/i965simple/brw_gs_emit.c
new file mode 100644
index 0000000000..c3cc90b10f
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_gs_emit.c
@@ -0,0 +1,148 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_gs.h"
+
+static void brw_gs_alloc_regs( struct brw_gs_compile *c,
+ unsigned nr_verts )
+{
+ unsigned i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->prog_data.urb_read_length = c->nr_regs;
+ c->prog_data.total_grf = i;
+}
+
+
+static void brw_gs_emit_vue(struct brw_gs_compile *c,
+ struct brw_reg vert,
+ boolean last,
+ unsigned header)
+{
+ struct brw_compile *p = &c->func;
+ boolean allocate = !last;
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
+
+ /* Send each vertex as a seperate write to the urb. This is
+ * different to the concept in brw_sf_emit.c, where subsequent
+ * writes are used to build up a single urb entry. Each of these
+ * writes instantiates a seperate urb entry, and a new one must be
+ * allocated each time.
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response length */
+ allocate ? 0 : 1, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_gs_quads( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ /* Use polygons for correct edgeflag behaviour. Note that vertex 3
+ * is the PV for quads, but vertex 0 for polygons:
+ */
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_quad_strip( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_tris( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 3);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
+}
+
+void brw_gs_lines( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 2);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
+}
+
+void brw_gs_points( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
+}
+
+
+
+
+
+
+
+
diff --git a/src/mesa/pipe/i965simple/brw_gs_state.c b/src/mesa/pipe/i965simple/brw_gs_state.c
new file mode 100644
index 0000000000..8e62eb4bd7
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_gs_state.c
@@ -0,0 +1,88 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+
+static void upload_gs_unit( struct brw_context *brw )
+{
+ struct brw_gs_unit_state gs;
+
+ memset(&gs, 0, sizeof(gs));
+
+ /* CACHE_NEW_GS_PROG */
+ if (brw->gs.prog_active) {
+ gs.thread0.grf_reg_count =
+ ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1;
+ gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6;
+ gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+ }
+ else {
+ gs.thread0.grf_reg_count = 0;
+ gs.thread0.kernel_start_pointer = 0;
+ gs.thread3.urb_entry_read_length = 1;
+ }
+
+ /* BRW_NEW_URB_FENCE */
+ gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries;
+ gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+
+ gs.thread4.max_threads = 0; /* Hardware requirement */
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ gs.thread4.stats_enable = 1;
+
+ /* CONSTANT */
+ gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ gs.thread1.single_program_flow = 1;
+ gs.thread3.dispatch_grf_start_reg = 1;
+ gs.thread3.const_urb_entry_read_offset = 0;
+ gs.thread3.const_urb_entry_read_length = 0;
+ gs.thread3.urb_entry_read_offset = 0;
+
+
+ brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs );
+}
+
+
+const struct brw_tracked_state brw_gs_unit = {
+ .dirty = {
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_GS_PROG
+ },
+ .update = upload_gs_unit
+};
diff --git a/src/mesa/pipe/i965simple/brw_metaops.c b/src/mesa/pipe/i965simple/brw_metaops.c
new file mode 100644
index 0000000000..09f5d28961
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_metaops.c
@@ -0,0 +1,538 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * frame buffer texture by Gary Wong <gtw@gnu.org>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+
+#if 0
+
+#define INIT(brw, STRUCT, ATTRIB) \
+do { \
+ brw->attribs.ATTRIB = &ctx->ATTRIB; \
+} while (0)
+
+#define DUP(brw, STRUCT, ATTRIB) \
+do { \
+ brw->metaops.attribs.ATTRIB = MALLOC_STRUCT(STRUCT); \
+ memcpy(brw->metaops.attribs.ATTRIB, \
+ brw->attribs.ATTRIB, \
+ sizeof(struct STRUCT)); \
+} while (0)
+
+
+#define INSTALL(brw, ATTRIB, STATE) \
+do { \
+ brw->attribs.ATTRIB = brw->metaops.attribs.ATTRIB; \
+ brw->state.dirty.mesa |= STATE; \
+} while (0)
+
+#define RESTORE(brw, ATTRIB, STATE) \
+do { \
+ brw->attribs.ATTRIB = &brw->intel.ctx.ATTRIB; \
+ brw->state.dirty.mesa |= STATE; \
+} while (0)
+
+static void init_attribs( struct brw_context *brw )
+{
+ DUP(brw, gl_colorbuffer_attrib, Color);
+ DUP(brw, gl_depthbuffer_attrib, Depth);
+ DUP(brw, gl_fog_attrib, Fog);
+ DUP(brw, gl_hint_attrib, Hint);
+ DUP(brw, gl_light_attrib, Light);
+ DUP(brw, gl_line_attrib, Line);
+ DUP(brw, gl_point_attrib, Point);
+ DUP(brw, gl_polygon_attrib, Polygon);
+ DUP(brw, gl_scissor_attrib, Scissor);
+ DUP(brw, gl_stencil_attrib, Stencil);
+ DUP(brw, gl_texture_attrib, Texture);
+ DUP(brw, gl_transform_attrib, Transform);
+ DUP(brw, gl_viewport_attrib, Viewport);
+ DUP(brw, gl_vertex_program_state, VertexProgram);
+ DUP(brw, gl_fragment_program_state, FragmentProgram);
+}
+
+static void install_attribs( struct brw_context *brw )
+{
+ INSTALL(brw, Color, _NEW_COLOR);
+ INSTALL(brw, Depth, _NEW_DEPTH);
+ INSTALL(brw, Fog, _NEW_FOG);
+ INSTALL(brw, Hint, _NEW_HINT);
+ INSTALL(brw, Light, _NEW_LIGHT);
+ INSTALL(brw, Line, _NEW_LINE);
+ INSTALL(brw, Point, _NEW_POINT);
+ INSTALL(brw, Polygon, _NEW_POLYGON);
+ INSTALL(brw, Scissor, _NEW_SCISSOR);
+ INSTALL(brw, Stencil, _NEW_STENCIL);
+ INSTALL(brw, Texture, _NEW_TEXTURE);
+ INSTALL(brw, Transform, _NEW_TRANSFORM);
+ INSTALL(brw, Viewport, _NEW_VIEWPORT);
+ INSTALL(brw, VertexProgram, _NEW_PROGRAM);
+ INSTALL(brw, FragmentProgram, _NEW_PROGRAM);
+}
+
+static void restore_attribs( struct brw_context *brw )
+{
+ RESTORE(brw, Color, _NEW_COLOR);
+ RESTORE(brw, Depth, _NEW_DEPTH);
+ RESTORE(brw, Fog, _NEW_FOG);
+ RESTORE(brw, Hint, _NEW_HINT);
+ RESTORE(brw, Light, _NEW_LIGHT);
+ RESTORE(brw, Line, _NEW_LINE);
+ RESTORE(brw, Point, _NEW_POINT);
+ RESTORE(brw, Polygon, _NEW_POLYGON);
+ RESTORE(brw, Scissor, _NEW_SCISSOR);
+ RESTORE(brw, Stencil, _NEW_STENCIL);
+ RESTORE(brw, Texture, _NEW_TEXTURE);
+ RESTORE(brw, Transform, _NEW_TRANSFORM);
+ RESTORE(brw, Viewport, _NEW_VIEWPORT);
+ RESTORE(brw, VertexProgram, _NEW_PROGRAM);
+ RESTORE(brw, FragmentProgram, _NEW_PROGRAM);
+}
+
+
+static const char *vp_prog =
+ "!!ARBvp1.0\n"
+ "MOV result.color, vertex.color;\n"
+ "MOV result.position, vertex.position;\n"
+ "END\n";
+
+static const char *fp_prog =
+ "!!ARBfp1.0\n"
+ "MOV result.color, fragment.color;\n"
+ "END\n";
+
+static const char *fp_tex_prog =
+ "!!ARBfp1.0\n"
+ "TEMP a;\n"
+ "ADD a, fragment.position, program.local[0];\n"
+ "MUL a, a, program.local[1];\n"
+ "TEX result.color, a, texture[0], 2D;\n"
+ "MOV result.depth.z, fragment.position;\n"
+ "END\n";
+
+/* Derived values of importance:
+ *
+ * FragmentProgram->_Current
+ * VertexProgram->_Enabled
+ * brw->vertex_program
+ * DrawBuffer->_ColorDrawBufferMask[0]
+ *
+ *
+ * More if drawpixels-through-texture is added.
+ */
+static void init_metaops_state( struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+
+ brw->metaops.vbo = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB);
+
+ ctx->Driver.BufferData(ctx,
+ GL_ARRAY_BUFFER_ARB,
+ 4096,
+ NULL,
+ GL_DYNAMIC_DRAW_ARB,
+ brw->metaops.vbo);
+
+ brw->metaops.fp = (struct gl_fragment_program *)
+ ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 1 );
+
+ brw->metaops.fp_tex = (struct gl_fragment_program *)
+ ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 1 );
+
+ brw->metaops.vp = (struct gl_vertex_program *)
+ ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 1 );
+
+ _mesa_parse_arb_fragment_program(ctx, GL_FRAGMENT_PROGRAM_ARB,
+ fp_prog, strlen(fp_prog),
+ brw->metaops.fp);
+
+ _mesa_parse_arb_fragment_program(ctx, GL_FRAGMENT_PROGRAM_ARB,
+ fp_tex_prog, strlen(fp_tex_prog),
+ brw->metaops.fp_tex);
+
+ _mesa_parse_arb_vertex_program(ctx, GL_VERTEX_PROGRAM_ARB,
+ vp_prog, strlen(vp_prog),
+ brw->metaops.vp);
+
+ brw->metaops.attribs.VertexProgram->Current = brw->metaops.vp;
+ brw->metaops.attribs.VertexProgram->_Enabled = TRUE;
+
+ brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp;
+}
+
+static void meta_flat_shade( struct intel_context *intel )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ brw->metaops.attribs.Raster->flatshade = TRUE;
+ brw->state.dirty.mesa |= _NEW_LIGHT;
+}
+
+
+static void meta_no_stencil_write( struct intel_context *intel )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ brw->metaops.attribs.Stencil->Enabled = FALSE;
+ brw->metaops.attribs.Stencil->WriteMask[0] = FALSE;
+ brw->state.dirty.mesa |= _NEW_STENCIL;
+}
+
+static void meta_no_depth_write( struct intel_context *intel )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ brw->metaops.attribs.Depth->Test = FALSE;
+ brw->metaops.attribs.Depth->Mask = FALSE;
+ brw->state.dirty.mesa |= _NEW_DEPTH;
+}
+
+
+static void meta_depth_replace( struct intel_context *intel )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, TRUE )
+ * ctx->Driver.DepthMask( ctx, TRUE )
+ */
+ brw->metaops.attribs.Depth->Test = TRUE;
+ brw->metaops.attribs.Depth->Mask = TRUE;
+ brw->state.dirty.mesa |= _NEW_DEPTH;
+
+ /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS )
+ */
+ brw->metaops.attribs.Depth->Func = GL_ALWAYS;
+
+ brw->state.dirty.mesa |= _NEW_DEPTH;
+}
+
+
+static void meta_stencil_replace( struct intel_context *intel,
+ unsigned s_mask,
+ unsigned s_clear)
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ brw->metaops.attribs.Stencil->Enabled = TRUE;
+ brw->metaops.attribs.Stencil->WriteMask[0] = s_mask;
+ brw->metaops.attribs.Stencil->ValueMask[0] = 0xff;
+ brw->metaops.attribs.Stencil->Ref[0] = s_clear;
+ brw->metaops.attribs.Stencil->Function[0] = GL_ALWAYS;
+ brw->metaops.attribs.Stencil->FailFunc[0] = GL_REPLACE;
+ brw->metaops.attribs.Stencil->ZPassFunc[0] = GL_REPLACE;
+ brw->metaops.attribs.Stencil->ZFailFunc[0] = GL_REPLACE;
+ brw->state.dirty.mesa |= _NEW_STENCIL;
+}
+
+
+static void meta_color_mask( struct intel_context *intel, boolean state )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ if (state)
+ COPY_4V(brw->metaops.attribs.Color->ColorMask,
+ brw->intel.ctx.Color.ColorMask);
+ else
+ ASSIGN_4V(brw->metaops.attribs.Color->ColorMask, 0, 0, 0, 0);
+
+ brw->state.dirty.mesa |= _NEW_COLOR;
+}
+
+static void meta_no_texture( struct intel_context *intel )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp;
+
+ brw->metaops.attribs.Texture->CurrentUnit = 0;
+ brw->metaops.attribs.Texture->_EnabledUnits = 0;
+ brw->metaops.attribs.Texture->_EnabledCoordUnits = 0;
+ brw->metaops.attribs.Texture->Unit[ 0 ].Enabled = 0;
+ brw->metaops.attribs.Texture->Unit[ 0 ]._ReallyEnabled = 0;
+
+ brw->state.dirty.mesa |= _NEW_TEXTURE | _NEW_PROGRAM;
+}
+
+static void meta_texture_blend_replace(struct intel_context *intel)
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ brw->metaops.attribs.Texture->CurrentUnit = 0;
+ brw->metaops.attribs.Texture->_EnabledUnits = 1;
+ brw->metaops.attribs.Texture->_EnabledCoordUnits = 1;
+ brw->metaops.attribs.Texture->Unit[ 0 ].Enabled = TEXTURE_2D_BIT;
+ brw->metaops.attribs.Texture->Unit[ 0 ]._ReallyEnabled = TEXTURE_2D_BIT;
+ brw->metaops.attribs.Texture->Unit[ 0 ].Current2D =
+ intel->frame_buffer_texobj;
+ brw->metaops.attribs.Texture->Unit[ 0 ]._Current =
+ intel->frame_buffer_texobj;
+
+ brw->state.dirty.mesa |= _NEW_TEXTURE | _NEW_PROGRAM;
+}
+
+static void meta_import_pixel_state(struct intel_context *intel)
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ RESTORE(brw, Color, _NEW_COLOR);
+ RESTORE(brw, Depth, _NEW_DEPTH);
+ RESTORE(brw, Fog, _NEW_FOG);
+ RESTORE(brw, Scissor, _NEW_SCISSOR);
+ RESTORE(brw, Stencil, _NEW_STENCIL);
+ RESTORE(brw, Texture, _NEW_TEXTURE);
+ RESTORE(brw, FragmentProgram, _NEW_PROGRAM);
+}
+
+static void meta_frame_buffer_texture( struct intel_context *intel,
+ int xoff, int yoff )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+ struct intel_region *region = intel_drawbuf_region( intel );
+
+ INSTALL(brw, FragmentProgram, _NEW_PROGRAM);
+
+ brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp_tex;
+ /* This is unfortunate, but seems to be necessary, since later on we
+ will end up calling _mesa_load_state_parameters to lookup the
+ local params (below), and that will want to look in ctx.FragmentProgram
+ instead of brw->attribs.FragmentProgram. */
+ intel->ctx.FragmentProgram.Current = brw->metaops.fp_tex;
+
+ brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 0 ] = xoff;
+ brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 1 ] = yoff;
+ brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 2 ] = 0.0;
+ brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 3 ] = 0.0;
+ brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 0 ] =
+ 1.0 / region->pitch;
+ brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 1 ] =
+ -1.0 / region->height;
+ brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 2 ] = 0.0;
+ brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 3 ] = 1.0;
+
+ brw->state.dirty.mesa |= _NEW_PROGRAM;
+}
+
+
+static void meta_draw_region( struct intel_context *intel,
+ struct intel_region *draw_region,
+ struct intel_region *depth_region )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ if (!brw->metaops.saved_draw_region) {
+ brw->metaops.saved_draw_region = brw->state.draw_region;
+ brw->metaops.saved_depth_region = brw->state.depth_region;
+ }
+
+ brw->state.draw_region = draw_region;
+ brw->state.depth_region = depth_region;
+
+ brw->state.dirty.mesa |= _NEW_BUFFERS;
+}
+
+
+static void meta_draw_quad(struct intel_context *intel,
+ float x0, float x1,
+ float y0, float y1,
+ float z,
+ ubyte red, ubyte green,
+ ubyte blue, ubyte alpha,
+ float s0, float s1,
+ float t0, float t1)
+{
+ GLcontext *ctx = &intel->ctx;
+ struct brw_context *brw = brw_context(&intel->ctx);
+ struct gl_client_array pos_array;
+ struct gl_client_array color_array;
+ struct gl_client_array *attribs[PIPE_ATTRIB_MAX];
+ struct _mesa_prim prim[1];
+ float pos[4][3];
+ ubyte color[4];
+
+ ctx->Driver.BufferData(ctx,
+ GL_ARRAY_BUFFER_ARB,
+ sizeof(pos) + sizeof(color),
+ NULL,
+ GL_DYNAMIC_DRAW_ARB,
+ brw->metaops.vbo);
+
+ pos[0][0] = x0;
+ pos[0][1] = y0;
+ pos[0][2] = z;
+
+ pos[1][0] = x1;
+ pos[1][1] = y0;
+ pos[1][2] = z;
+
+ pos[2][0] = x1;
+ pos[2][1] = y1;
+ pos[2][2] = z;
+
+ pos[3][0] = x0;
+ pos[3][1] = y1;
+ pos[3][2] = z;
+
+
+ ctx->Driver.BufferSubData(ctx,
+ GL_ARRAY_BUFFER_ARB,
+ 0,
+ sizeof(pos),
+ pos,
+ brw->metaops.vbo);
+
+ color[0] = red;
+ color[1] = green;
+ color[2] = blue;
+ color[3] = alpha;
+
+ ctx->Driver.BufferSubData(ctx,
+ GL_ARRAY_BUFFER_ARB,
+ sizeof(pos),
+ sizeof(color),
+ color,
+ brw->metaops.vbo);
+
+ /* Ignoring texture coords.
+ */
+
+ memset(attribs, 0, PIPE_ATTRIB_MAX * sizeof(*attribs));
+
+ attribs[VERT_ATTRIB_POS] = &pos_array;
+ attribs[VERT_ATTRIB_POS]->Ptr = 0;
+ attribs[VERT_ATTRIB_POS]->Type = GL_FLOAT;
+ attribs[VERT_ATTRIB_POS]->Enabled = 1;
+ attribs[VERT_ATTRIB_POS]->Size = 3;
+ attribs[VERT_ATTRIB_POS]->StrideB = 3 * sizeof(float);
+ attribs[VERT_ATTRIB_POS]->Stride = 3 * sizeof(float);
+ attribs[VERT_ATTRIB_POS]->_MaxElement = 4;
+ attribs[VERT_ATTRIB_POS]->Normalized = 0;
+ attribs[VERT_ATTRIB_POS]->BufferObj = brw->metaops.vbo;
+
+ attribs[VERT_ATTRIB_COLOR0] = &color_array;
+ attribs[VERT_ATTRIB_COLOR0]->Ptr = (const ubyte *)sizeof(pos);
+ attribs[VERT_ATTRIB_COLOR0]->Type = GL_UNSIGNED_BYTE;
+ attribs[VERT_ATTRIB_COLOR0]->Enabled = 1;
+ attribs[VERT_ATTRIB_COLOR0]->Size = 4;
+ attribs[VERT_ATTRIB_COLOR0]->StrideB = 0;
+ attribs[VERT_ATTRIB_COLOR0]->Stride = 0;
+ attribs[VERT_ATTRIB_COLOR0]->_MaxElement = 1;
+ attribs[VERT_ATTRIB_COLOR0]->Normalized = 1;
+ attribs[VERT_ATTRIB_COLOR0]->BufferObj = brw->metaops.vbo;
+
+ /* Just ignoring texture coordinates for now.
+ */
+
+ memset(prim, 0, sizeof(*prim));
+
+ prim[0].mode = GL_TRIANGLE_FAN;
+ prim[0].begin = 1;
+ prim[0].end = 1;
+ prim[0].weak = 0;
+ prim[0].pad = 0;
+ prim[0].start = 0;
+ prim[0].count = 4;
+
+ brw_draw_prims(&brw->intel.ctx,
+ (const struct gl_client_array **)attribs,
+ prim, 1,
+ NULL,
+ 0,
+ 3 );
+}
+
+
+static void install_meta_state( struct intel_context *intel )
+{
+ GLcontext *ctx = &intel->ctx;
+ struct brw_context *brw = brw_context(ctx);
+
+ if (!brw->metaops.vbo) {
+ init_metaops_state(brw);
+ }
+
+ install_attribs(brw);
+
+ meta_no_texture(&brw->intel);
+ meta_flat_shade(&brw->intel);
+ brw->metaops.restore_draw_mask = ctx->DrawBuffer->_ColorDrawBufferMask[0];
+ brw->metaops.restore_fp = ctx->FragmentProgram.Current;
+
+ /* This works without adjusting refcounts. Fix later?
+ */
+ brw->metaops.saved_draw_region = brw->state.draw_region;
+ brw->metaops.saved_depth_region = brw->state.depth_region;
+ brw->metaops.active = 1;
+
+ brw->state.dirty.brw |= BRW_NEW_METAOPS;
+}
+
+static void leave_meta_state( struct intel_context *intel )
+{
+ GLcontext *ctx = &intel->ctx;
+ struct brw_context *brw = brw_context(ctx);
+
+ restore_attribs(brw);
+
+ ctx->DrawBuffer->_ColorDrawBufferMask[0] = brw->metaops.restore_draw_mask;
+ ctx->FragmentProgram.Current = brw->metaops.restore_fp;
+
+ brw->state.draw_region = brw->metaops.saved_draw_region;
+ brw->state.depth_region = brw->metaops.saved_depth_region;
+ brw->metaops.saved_draw_region = NULL;
+ brw->metaops.saved_depth_region = NULL;
+ brw->metaops.active = 0;
+
+ brw->state.dirty.mesa |= _NEW_BUFFERS;
+ brw->state.dirty.brw |= BRW_NEW_METAOPS;
+}
+
+
+
+void brw_init_metaops( struct brw_context *brw )
+{
+ init_attribs(brw);
+}
+
+void brw_destroy_metaops( struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+
+ if (brw->metaops.vbo)
+ ctx->Driver.DeleteBuffer( ctx, brw->metaops.vbo );
+
+/* ctx->Driver.DeleteProgram( ctx, brw->metaops.fp ); */
+/* ctx->Driver.DeleteProgram( ctx, brw->metaops.fp_tex ); */
+/* ctx->Driver.DeleteProgram( ctx, brw->metaops.vp ); */
+}
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_misc_state.c b/src/mesa/pipe/i965simple/brw_misc_state.c
new file mode 100644
index 0000000000..0750502334
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_misc_state.c
@@ -0,0 +1,425 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_batch.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+
+
+
+/***********************************************************************
+ * Blend color
+ */
+
+void brw_upload_blend_constant_color(struct brw_context *brw)
+{
+ struct brw_blend_constant_color bcc;
+
+ memset(&bcc, 0, sizeof(bcc));
+ bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
+ bcc.header.length = sizeof(bcc)/4-2;
+ bcc.blend_constant_color[0] = brw->attribs.BlendColor.color[0];
+ bcc.blend_constant_color[1] = brw->attribs.BlendColor.color[1];
+ bcc.blend_constant_color[2] = brw->attribs.BlendColor.color[2];
+ bcc.blend_constant_color[3] = brw->attribs.BlendColor.color[3];
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bcc);
+}
+
+
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is the BRW_SS_POOL cache buffer.
+ */
+static void upload_binding_table_pointers(struct brw_context *brw)
+{
+ struct brw_binding_table_pointers btp;
+ memset(&btp, 0, sizeof(btp));
+
+ btp.header.opcode = CMD_BINDING_TABLE_PTRS;
+ btp.header.length = sizeof(btp)/4 - 2;
+ btp.vs = 0;
+ btp.gs = 0;
+ btp.clp = 0;
+ btp.sf = 0;
+ btp.wm = brw->wm.bind_ss_offset;
+
+ BRW_CACHED_BATCH_STRUCT(brw, &btp);
+}
+
+const struct brw_tracked_state brw_binding_table_pointers = {
+ .dirty = {
+ .brw = 0,
+ .cache = CACHE_NEW_SURF_BIND
+ },
+ .update = upload_binding_table_pointers,
+};
+
+
+/**
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer.
+ */
+static void upload_pipelined_state_pointers(struct brw_context *brw )
+{
+ struct brw_pipelined_state_pointers psp;
+ memset(&psp, 0, sizeof(psp));
+
+ psp.header.opcode = CMD_PIPELINED_STATE_POINTERS;
+ psp.header.length = sizeof(psp)/4 - 2;
+
+ psp.vs.offset = brw->vs.state_gs_offset >> 5;
+ psp.sf.offset = brw->sf.state_gs_offset >> 5;
+ psp.wm.offset = brw->wm.state_gs_offset >> 5;
+ psp.cc.offset = brw->cc.state_gs_offset >> 5;
+
+ /* GS gets turned on and off regularly. Need to re-emit URB fence
+ * after this occurs.
+ */
+ if (brw->gs.prog_active) {
+ psp.gs.offset = brw->gs.state_gs_offset >> 5;
+ psp.gs.enable = 1;
+ }
+
+ {
+ psp.clp.offset = brw->clip.state_gs_offset >> 5;
+ psp.clp.enable = 1;
+ }
+
+
+ if (BRW_CACHED_BATCH_STRUCT(brw, &psp))
+ brw->state.dirty.brw |= BRW_NEW_PSP;
+}
+
+const struct brw_tracked_state brw_pipelined_state_pointers = {
+ .dirty = {
+ .brw = 0,
+ .cache = (CACHE_NEW_VS_UNIT |
+ CACHE_NEW_GS_UNIT |
+ CACHE_NEW_GS_PROG |
+ CACHE_NEW_CLIP_UNIT |
+ CACHE_NEW_SF_UNIT |
+ CACHE_NEW_WM_UNIT |
+ CACHE_NEW_CC_UNIT)
+ },
+ .update = upload_pipelined_state_pointers
+};
+
+static void upload_psp_urb_cbs(struct brw_context *brw )
+{
+ upload_pipelined_state_pointers(brw);
+ brw_upload_urb_fence(brw);
+ brw_upload_constant_buffer_state(brw);
+}
+
+
+const struct brw_tracked_state brw_psp_urb_cbs = {
+ .dirty = {
+ .brw = BRW_NEW_URB_FENCE,
+ .cache = (CACHE_NEW_VS_UNIT |
+ CACHE_NEW_GS_UNIT |
+ CACHE_NEW_GS_PROG |
+ CACHE_NEW_CLIP_UNIT |
+ CACHE_NEW_SF_UNIT |
+ CACHE_NEW_WM_UNIT |
+ CACHE_NEW_CC_UNIT)
+ },
+ .update = upload_psp_urb_cbs
+};
+
+/**
+ * Upload the depthbuffer offset and format.
+ *
+ * We have to do this per state validation as we need to emit the relocation
+ * in the batch buffer.
+ */
+static void upload_depthbuffer(struct brw_context *brw)
+{
+ struct pipe_surface *depth_surface = brw->attribs.FrameBuffer.zbuf;
+
+ BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2));
+ if (depth_surface == NULL) {
+ OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+ (BRW_SURFACE_NULL << 29));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else {
+ unsigned int format;
+
+ switch (depth_surface->cpp) {
+ case 2:
+ format = BRW_DEPTHFORMAT_D16_UNORM;
+ break;
+ case 4:
+ if (depth_surface->format == PIPE_FORMAT_Z32_FLOAT)
+ format = BRW_DEPTHFORMAT_D32_FLOAT;
+ else
+ format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ OUT_BATCH(((depth_surface->pitch * depth_surface->cpp) - 1) |
+ (format << 18) |
+ (BRW_TILEWALK_YMAJOR << 26) |
+#if 0
+ (depth_surface->region->tiled << 27) |
+#endif
+ (BRW_SURFACE_2D << 29));
+ OUT_RELOC(depth_surface->buffer,
+ PIPE_BUFFER_FLAG_READ | PIPE_BUFFER_FLAG_WRITE, 0);
+ OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+ ((depth_surface->pitch - 1) << 6) |
+ ((depth_surface->height - 1) << 19));
+ OUT_BATCH(0);
+ }
+ ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_depthbuffer = {
+ .dirty = {
+ .brw = 0,
+ .cache = 0
+ },
+ .update = upload_depthbuffer,
+};
+
+
+
+/***********************************************************************
+ * Polygon stipple offset packet
+ */
+
+static void upload_polygon_stipple_offset(struct brw_context *brw)
+{
+ struct brw_polygon_stipple_offset bpso;
+
+ memset(&bpso, 0, sizeof(bpso));
+ bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
+ bpso.header.length = sizeof(bpso)/4-2;
+
+ bpso.bits0.x_offset = 0;
+ bpso.bits0.y_offset = 0;
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bpso);
+}
+
+/***********************************************************************
+ * Polygon stipple packet
+ */
+
+void brw_upload_polygon_stipple(struct brw_context *brw)
+{
+ struct brw_polygon_stipple bps;
+ unsigned i;
+
+ /*update the offset at the same time it will always be 0*/
+ upload_polygon_stipple_offset(brw);
+ memset(&bps, 0, sizeof(bps));
+ bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
+ bps.header.length = sizeof(bps)/4-2;
+
+ for (i = 0; i < 32; i++)
+ bps.stipple[i] = brw->attribs.PolygonStipple->stipple[31 - i]; /* invert */
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bps);
+}
+
+/***********************************************************************
+ * Line stipple packet
+ */
+
+void brw_upload_line_stipple(struct brw_context *brw)
+{
+ struct brw_line_stipple bls;
+ float tmp;
+ int tmpi;
+
+ memset(&bls, 0, sizeof(bls));
+ bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
+ bls.header.length = sizeof(bls)/4 - 2;
+
+ bls.bits0.pattern = brw->attribs.Raster->line_stipple_pattern;
+ bls.bits1.repeat_count = brw->attribs.Raster->line_stipple_factor;
+
+ tmp = 1.0 / (float) brw->attribs.Raster->line_stipple_factor;
+ tmpi = tmp * (1<<13);
+
+
+ bls.bits1.inverse_repeat_count = tmpi;
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bls);
+}
+
+
+/***********************************************************************
+ * Misc constant state packets
+ */
+
+static void upload_pipe_control(struct brw_context *brw)
+{
+ struct brw_pipe_control pc;
+
+ return;
+
+ memset(&pc, 0, sizeof(pc));
+
+ pc.header.opcode = CMD_PIPE_CONTROL;
+ pc.header.length = sizeof(pc)/4 - 2;
+ pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE;
+
+ pc.header.instruction_state_cache_flush_enable = 1;
+
+ pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL;
+
+ BRW_BATCH_STRUCT(brw, &pc);
+}
+
+const struct brw_tracked_state brw_pipe_control = {
+ .dirty = {
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .update = upload_pipe_control
+};
+
+
+/***********************************************************************
+ * Misc invarient state packets
+ */
+
+static void upload_invarient_state( struct brw_context *brw )
+{
+ {
+ /* 0x61040000 Pipeline Select */
+ /* PipelineSelect : 0 */
+ struct brw_pipeline_select ps;
+
+ memset(&ps, 0, sizeof(ps));
+ ps.header.opcode = CMD_PIPELINE_SELECT;
+ ps.header.pipeline_select = 0;
+ BRW_BATCH_STRUCT(brw, &ps);
+ }
+
+ {
+ struct brw_global_depth_offset_clamp gdo;
+ memset(&gdo, 0, sizeof(gdo));
+
+ /* Disable depth offset clamping.
+ */
+ gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
+ gdo.header.length = sizeof(gdo)/4 - 2;
+ gdo.depth_offset_clamp = 0.0;
+
+ BRW_BATCH_STRUCT(brw, &gdo);
+ }
+
+
+ /* 0x61020000 State Instruction Pointer */
+ {
+ struct brw_system_instruction_pointer sip;
+ memset(&sip, 0, sizeof(sip));
+
+ sip.header.opcode = CMD_STATE_INSN_POINTER;
+ sip.header.length = 0;
+ sip.bits0.pad = 0;
+ sip.bits0.system_instruction_pointer = 0;
+ BRW_BATCH_STRUCT(brw, &sip);
+ }
+
+
+ {
+ struct brw_vf_statistics vfs;
+ memset(&vfs, 0, sizeof(vfs));
+
+ vfs.opcode = CMD_VF_STATISTICS;
+ if (BRW_DEBUG & DEBUG_STATS)
+ vfs.statistics_enable = 1;
+
+ BRW_BATCH_STRUCT(brw, &vfs);
+ }
+}
+
+const struct brw_tracked_state brw_invarient_state = {
+ .dirty = {
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .update = upload_invarient_state
+};
+
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations in many places for
+ * cached state, and instead emit pointers inside of large, mostly-static
+ * state pools. This comes at the expense of memory, and more expensive cache
+ * misses.
+ */
+static void upload_state_base_address( struct brw_context *brw )
+{
+ /* Output the structure (brw_state_base_address) directly to the
+ * batchbuffer, so we can emit relocations inline.
+ */
+ BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+ OUT_RELOC(brw->pool[BRW_GS_POOL].buffer,
+ PIPE_BUFFER_FLAG_READ,
+ 1); /* General state base address */
+ OUT_RELOC(brw->pool[BRW_SS_POOL].buffer,
+ PIPE_BUFFER_FLAG_READ,
+ 1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ ADVANCE_BATCH();
+}
+
+
+const struct brw_tracked_state brw_state_base_address = {
+ .dirty = {
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .update = upload_state_base_address
+};
diff --git a/src/mesa/pipe/i965simple/brw_program.c b/src/mesa/pipe/i965simple/brw_program.c
new file mode 100644
index 0000000000..c4640a2b06
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_program.c
@@ -0,0 +1,130 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_context.h"
+#include "brw_util.h"
+
+#if 0
+static void brwBindProgram( GLcontext *ctx,
+ int target,
+ struct gl_program *prog )
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ switch (target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+ break;
+ }
+}
+
+static struct gl_program *brwNewProgram( GLcontext *ctx,
+ int target,
+ unsigned id )
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ switch (target) {
+ case GL_VERTEX_PROGRAM_ARB: {
+ struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
+ if (prog) {
+ prog->id = brw->program_id++;
+
+ return _mesa_init_vertex_program( ctx, &prog->program,
+ target, id );
+ }
+ else
+ return NULL;
+ }
+
+ case GL_FRAGMENT_PROGRAM_ARB: {
+ struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
+ if (prog) {
+ prog->id = brw->program_id++;
+
+ return _mesa_init_fragment_program( ctx, &prog->program,
+ target, id );
+ }
+ else
+ return NULL;
+ }
+
+ default:
+ return _mesa_new_program(ctx, target, id);
+ }
+}
+
+static void brwDeleteProgram( GLcontext *ctx,
+ struct gl_program *prog )
+{
+
+ _mesa_delete_program( ctx, prog );
+}
+
+
+static boolean brwIsProgramNative( GLcontext *ctx,
+ int target,
+ struct gl_program *prog )
+{
+ return TRUE;
+}
+
+static void brwProgramStringNotify( GLcontext *ctx,
+ int target,
+ struct gl_program *prog )
+{
+ if (target == GL_FRAGMENT_PROGRAM_ARB) {
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_fragment_program *p = (struct brw_fragment_program *)prog;
+ struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
+ if (p == fp)
+ brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+ p->id = brw->program_id++;
+ p->param_state = p->program.Base.Parameters->StateFlags;
+ }
+ else if (target == GL_VERTEX_PROGRAM_ARB) {
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_vertex_program *p = (struct brw_vertex_program *)prog;
+ struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
+ if (p == vp)
+ brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+ p->id = brw->program_id++;
+ p->param_state = p->program.Base.Parameters->StateFlags;
+
+ /* Also tell tnl about it:
+ */
+ _tnl_program_string(ctx, target, prog);
+ }
+}
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_reg.h b/src/mesa/pipe/i965simple/brw_reg.h
new file mode 100644
index 0000000000..9e885c3b3b
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_reg.h
@@ -0,0 +1,76 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#define CMD_MI (0x0 << 29)
+#define CMD_2D (0x2 << 29)
+#define CMD_3D (0x3 << 29)
+
+#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23)
+
+/* Stalls command execution waiting for the given events to have occurred. */
+#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23))
+#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6)
+#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2)
+
+/* Primitive dispatch on 830-945 */
+#define _3DPRIMITIVE (CMD_3D | (0x1f << 24))
+#define PRIM_INDIRECT (1<<23)
+#define PRIM_INLINE (0<<23)
+#define PRIM_INDIRECT_SEQUENTIAL (0<<17)
+#define PRIM_INDIRECT_ELTS (1<<17)
+
+#define PRIM3D_TRILIST (0x0<<18)
+#define PRIM3D_TRISTRIP (0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE (0x2<<18)
+#define PRIM3D_TRIFAN (0x3<<18)
+#define PRIM3D_POLY (0x4<<18)
+#define PRIM3D_LINELIST (0x5<<18)
+#define PRIM3D_LINESTRIP (0x6<<18)
+#define PRIM3D_RECTLIST (0x7<<18)
+#define PRIM3D_POINTLIST (0x8<<18)
+#define PRIM3D_DIB (0x9<<18)
+#define PRIM3D_MASK (0x1f<<18)
+
+#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6)
+
+#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4)
+
+#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6)
+
+/* BR00 */
+#define XY_BLT_WRITE_ALPHA (1 << 21)
+#define XY_BLT_WRITE_RGB (1 << 20)
+#define XY_SRC_TILED (1 << 15)
+#define XY_DST_TILED (1 << 11)
+
+/* BR13 */
+#define BR13_565 (0x1 << 24)
+#define BR13_8888 (0x3 << 24)
+
+#define FENCE_LINEAR 0
+#define FENCE_XMAJOR 1
+#define FENCE_YMAJOR 2
diff --git a/src/mesa/pipe/softpipe/sp_state_feedback.c b/src/mesa/pipe/i965simple/brw_regions.c
index 02aaf34e75..32ad988279 100644
--- a/src/mesa/pipe/softpipe/sp_state_feedback.c
+++ b/src/mesa/pipe/i965simple/brw_regions.c
@@ -1,5 +1,5 @@
/**************************************************************************
- *
+ *
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
@@ -10,11 +10,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,51 +22,18 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
-/**
- * Authors:
- * Brian Paul
+/* Provide additional functionality on top of bufmgr buffers:
+ * - 2d semantics and blit operations (XXX: remove/simplify blits??)
+ * - refcounting of buffers for multiple images in a buffer.
+ * - refcounting of buffer mappings.
*/
-
-#include "sp_context.h"
-#include "sp_state.h"
-#include "sp_surface.h"
-
+#include "pipe/p_defines.h"
#include "pipe/p_winsys.h"
-#include "pipe/draw/draw_context.h"
-
-
-void
-softpipe_set_feedback_state(struct pipe_context *pipe,
- const struct pipe_feedback_state *feedback)
-{
- struct softpipe_context *softpipe = softpipe_context(pipe);
-
- softpipe->feedback = *feedback; /* struct copy */
- /*
- softpipe->dirty |= SP_NEW_FEEDBACK;
- */
-
- draw_set_feedback_state(softpipe->draw, feedback);
-}
-
-
-void
-softpipe_set_feedback_buffer(struct pipe_context *pipe,
- unsigned index,
- const struct pipe_feedback_buffer *feedback)
-{
- struct softpipe_context *softpipe = softpipe_context(pipe);
+#include "brw_context.h"
+#include "brw_blit.h"
- assert(index < PIPE_MAX_FEEDBACK_ATTRIBS);
- /* Need to be careful with referencing */
- pipe->winsys->buffer_reference(pipe->winsys,
- &softpipe->feedback_buffer[index].buffer,
- feedback->buffer);
- softpipe->feedback_buffer[index].size = feedback->size;
- softpipe->feedback_buffer[index].start_offset = feedback->start_offset;
-}
diff --git a/src/mesa/pipe/i965simple/brw_regions.h b/src/mesa/pipe/i965simple/brw_regions.h
new file mode 100644
index 0000000000..1eebd1d5ee
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_regions.h
@@ -0,0 +1,5 @@
+#ifndef BRW_REGIONS_H
+#define BRW_REGIONS_H
+
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_sf.c b/src/mesa/pipe/i965simple/brw_sf.c
new file mode 100644
index 0000000000..f009ff37d9
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_sf.c
@@ -0,0 +1,194 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+#include "brw_state.h"
+
+#if 0
+#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
+
+static void compile_sf_prog( struct brw_context *brw,
+ struct brw_sf_prog_key *key )
+{
+ struct brw_sf_compile c;
+ const unsigned *program;
+ unsigned program_size;
+ unsigned i, idx;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(&c.func);
+
+ c.key = *key;
+ c.nr_attrs = brw_count_bits(c.key.attrs);
+ c.nr_attr_regs = (c.nr_attrs+1)/2;
+ c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
+ c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
+
+ c.prog_data.urb_read_length = c.nr_attr_regs;
+ c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+
+ /* Construct map from attribute number to position in the vertex.
+ */
+ for (i = idx = 0; i < VERT_RESULT_MAX; i++)
+ if (c.key.attrs & (1<<i)) {
+ c.attr_to_idx[i] = idx;
+ c.idx_to_attr[idx] = i;
+ if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
+ c.point_attrs[i].CoordReplace =
+ brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0];
+ } else
+ c.point_attrs[i].CoordReplace = FALSE;
+ idx++;
+ }
+
+ /* Which primitive? Or all three?
+ */
+ switch (key->primitive) {
+ case SF_TRIANGLES:
+ c.nr_verts = 3;
+ brw_emit_tri_setup( &c );
+ break;
+ case SF_LINES:
+ c.nr_verts = 2;
+ brw_emit_line_setup( &c );
+ break;
+ case SF_POINTS:
+ c.nr_verts = 1;
+ if (key->do_point_sprite)
+ brw_emit_point_sprite_setup( &c );
+ else
+ brw_emit_point_setup( &c );
+ break;
+ case SF_UNFILLED_TRIS:
+ c.nr_verts = 3;
+ brw_emit_anyprim_setup( &c );
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ /* Upload
+ */
+ brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG],
+ &c.key,
+ sizeof(c.key),
+ program,
+ program_size,
+ &c.prog_data,
+ &brw->sf.prog_data );
+}
+
+
+static boolean search_cache( struct brw_context *brw,
+ struct brw_sf_prog_key *key )
+{
+ return brw_search_cache(&brw->cache[BRW_SF_PROG],
+ key, sizeof(*key),
+ &brw->sf.prog_data,
+ &brw->sf.prog_gs_offset);
+}
+
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_sf_prog( struct brw_context *brw )
+{
+ struct brw_sf_prog_key key;
+
+ memset(&key, 0, sizeof(key));
+
+ /* Populate the key, noting state dependencies:
+ */
+ /* CACHE_NEW_VS_PROG */
+ key.attrs = brw->vs.prog_data->outputs_written;
+
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ switch (brw->reduced_primitive) {
+ case PIPE_PRIM_TRIANGLES:
+ /* NOTE: We just use the edgeflag attribute as an indicator that
+ * unfilled triangles are active. We don't actually do the
+ * edgeflag testing here, it is already done in the clip
+ * program.
+ */
+ if (key.attrs & (1<<VERT_RESULT_EDGE))
+ key.primitive = SF_UNFILLED_TRIS;
+ else
+ key.primitive = SF_TRIANGLES;
+ break;
+ case PIPE_PRIM_LINES:
+ key.primitive = SF_LINES;
+ break;
+ case PIPE_PRIM_POINTS:
+ key.primitive = SF_POINTS;
+ break;
+ }
+
+ /* BRW_NEW_POINT */
+ key.do_point_sprite = brw->attribs.Point->PointSprite;
+ key.SpriteOrigin = brw->attribs.Point->SpriteOrigin;
+ /* BRW_NEW_RASTER */
+ key.do_flat_shading = (brw->attribs.Raster->flatshade);
+ key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide);
+
+ /* _NEW_POLYGON */
+ if (key.do_twoside_color)
+ key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW);
+
+
+ if (!search_cache(brw, &key))
+ compile_sf_prog( brw, &key );
+}
+
+
+const struct brw_tracked_state brw_sf_prog = {
+ .dirty = {
+ .brw = (BRW_NEW_RASTER |
+ BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .update = upload_sf_prog
+};
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_sf.h b/src/mesa/pipe/i965simple/brw_sf.h
new file mode 100644
index 0000000000..d04388325d
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_sf.h
@@ -0,0 +1,111 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_SF_H
+#define BRW_SF_H
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+#define SF_POINTS 0
+#define SF_LINES 1
+#define SF_TRIANGLES 2
+#define SF_UNFILLED_TRIS 3
+
+struct brw_sf_prog_key {
+ unsigned attrs:32;
+ unsigned primitive:2;
+ unsigned do_twoside_color:1;
+ unsigned do_flat_shading:1;
+ unsigned frontface_ccw:1;
+ unsigned do_point_sprite:1;
+ unsigned pad:10;
+ int SpriteOrigin;
+};
+
+struct brw_sf_point_tex {
+ boolean CoordReplace;
+};
+
+struct brw_sf_compile {
+ struct brw_compile func;
+ struct brw_sf_prog_key key;
+ struct brw_sf_prog_data prog_data;
+
+ struct brw_reg pv;
+ struct brw_reg det;
+ struct brw_reg dx0;
+ struct brw_reg dx2;
+ struct brw_reg dy0;
+ struct brw_reg dy2;
+
+ /* z and 1/w passed in seperately:
+ */
+ struct brw_reg z[3];
+ struct brw_reg inv_w[3];
+
+ /* The vertices:
+ */
+ struct brw_reg vert[3];
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ struct brw_reg inv_det;
+ struct brw_reg a1_sub_a0;
+ struct brw_reg a2_sub_a0;
+ struct brw_reg tmp;
+
+ struct brw_reg m1Cx;
+ struct brw_reg m2Cy;
+ struct brw_reg m3C0;
+
+ unsigned nr_verts;
+ unsigned nr_attrs;
+ unsigned nr_attr_regs;
+ unsigned nr_setup_attrs;
+ unsigned nr_setup_regs;
+#if 0
+ ubyte attr_to_idx[VERT_RESULT_MAX];
+ ubyte idx_to_attr[VERT_RESULT_MAX];
+ struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX];
+#endif
+};
+
+
+void brw_emit_tri_setup( struct brw_sf_compile *c );
+void brw_emit_line_setup( struct brw_sf_compile *c );
+void brw_emit_point_setup( struct brw_sf_compile *c );
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c );
+void brw_emit_anyprim_setup( struct brw_sf_compile *c );
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_sf_emit.c b/src/mesa/pipe/i965simple/brw_sf_emit.c
new file mode 100644
index 0000000000..93f23171f2
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_sf_emit.c
@@ -0,0 +1,696 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+
+#if 0
+static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
+ struct brw_reg vert,
+ unsigned attr)
+{
+ unsigned off = c->attr_to_idx[attr] / 2;
+ unsigned sub = c->attr_to_idx[attr] % 2;
+
+ return brw_vec4_grf(vert.nr + off, sub * 4);
+}
+
+static boolean have_attr(struct brw_sf_compile *c,
+ unsigned attr)
+{
+ return (c->key.attrs & (1<<attr)) ? 1 : 0;
+}
+
+
+
+/***********************************************************************
+ * Twoside lighting
+ */
+static void copy_bfc( struct brw_sf_compile *c,
+ struct brw_reg vert )
+{
+ struct brw_compile *p = &c->func;
+ unsigned i;
+
+ for (i = 0; i < 2; i++) {
+ if (have_attr(c, VERT_RESULT_COL0+i) &&
+ have_attr(c, VERT_RESULT_BFC0+i))
+ brw_MOV(p,
+ get_vert_attr(c, vert, VERT_RESULT_COL0+i),
+ get_vert_attr(c, vert, VERT_RESULT_BFC0+i));
+ }
+}
+
+
+static void do_twoside_color( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+ unsigned backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ /* XXX: What happens if BFC isn't present? This could only happen
+ * for user-supplied vertex programs, as t_vp_build.c always does
+ * the right thing.
+ */
+ if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
+ !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
+ return;
+
+ /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
+ * to get all channels active inside the IF. In the clipping code
+ * we run with NoMask, so it's not an option and we can use
+ * BRW_EXECUTE_1 for all comparisions.
+ */
+ brw_push_insn_state(p);
+ brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
+ if_insn = brw_IF(p, BRW_EXECUTE_4);
+ {
+ switch (c->nr_verts) {
+ case 3: copy_bfc(c, c->vert[2]);
+ case 2: copy_bfc(c, c->vert[1]);
+ case 1: copy_bfc(c, c->vert[0]);
+ }
+ }
+ brw_ENDIF(p, if_insn);
+ brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Flat shading
+ */
+
+#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
+ (1<<VERT_RESULT_COL1))
+
+static void copy_colors( struct brw_sf_compile *c,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ struct brw_compile *p = &c->func;
+ unsigned i;
+
+ for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
+ if (have_attr(c,i))
+ brw_MOV(p,
+ get_vert_attr(c, dst, i),
+ get_vert_attr(c, src, i));
+ }
+}
+
+
+
+/* Need to use a computed jump to copy flatshaded attributes as the
+ * vertices are ordered according to y-coordinate before reaching this
+ * point, so the PV could be anywhere.
+ */
+static void do_flatshade_triangle( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+ if (!nr)
+ return;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ brw_push_insn_state(p);
+
+ brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1));
+ brw_JMPI(p, ip, ip, c->pv);
+
+ copy_colors(c, c->vert[1], c->vert[0]);
+ copy_colors(c, c->vert[2], c->vert[0]);
+ brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1));
+
+ copy_colors(c, c->vert[0], c->vert[1]);
+ copy_colors(c, c->vert[2], c->vert[1]);
+ brw_JMPI(p, ip, ip, brw_imm_ud(nr*2));
+
+ copy_colors(c, c->vert[0], c->vert[2]);
+ copy_colors(c, c->vert[1], c->vert[2]);
+
+ brw_pop_insn_state(p);
+}
+
+
+static void do_flatshade_line( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+
+ if (!nr)
+ return;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ brw_push_insn_state(p);
+
+ brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1));
+ brw_JMPI(p, ip, ip, c->pv);
+ copy_colors(c, c->vert[1], c->vert[0]);
+
+ brw_JMPI(p, ip, ip, brw_imm_ud(nr));
+ copy_colors(c, c->vert[0], c->vert[1]);
+
+ brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Triangle setup.
+ */
+
+
+static void alloc_regs( struct brw_sf_compile *c )
+{
+ unsigned reg, i;
+
+ /* Values computed by fixed function unit:
+ */
+ c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD);
+ c->det = brw_vec1_grf(1, 2);
+ c->dx0 = brw_vec1_grf(1, 3);
+ c->dx2 = brw_vec1_grf(1, 4);
+ c->dy0 = brw_vec1_grf(1, 5);
+ c->dy2 = brw_vec1_grf(1, 6);
+
+ /* z and 1/w passed in seperately:
+ */
+ c->z[0] = brw_vec1_grf(2, 0);
+ c->inv_w[0] = brw_vec1_grf(2, 1);
+ c->z[1] = brw_vec1_grf(2, 2);
+ c->inv_w[1] = brw_vec1_grf(2, 3);
+ c->z[2] = brw_vec1_grf(2, 4);
+ c->inv_w[2] = brw_vec1_grf(2, 5);
+
+ /* The vertices:
+ */
+ reg = 3;
+ for (i = 0; i < c->nr_verts; i++) {
+ c->vert[i] = brw_vec8_grf(reg, 0);
+ reg += c->nr_attr_regs;
+ }
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ c->inv_det = brw_vec1_grf(reg, 0); reg++;
+ c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->tmp = brw_vec8_grf(reg, 0); reg++;
+
+ /* Note grf allocation:
+ */
+ c->prog_data.total_grf = reg;
+
+
+ /* Outputs of this program - interpolation coefficients for
+ * rasterization:
+ */
+ c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
+ c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
+ c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
+}
+
+
+static void copy_z_inv_w( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ unsigned i;
+
+ brw_push_insn_state(p);
+
+ /* Copy both scalars with a single MOV:
+ */
+ for (i = 0; i < c->nr_verts; i++)
+ brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
+
+ brw_pop_insn_state(p);
+}
+
+
+static void invert_det( struct brw_sf_compile *c)
+{
+ /* Looks like we invert all 8 elements just to get 1/det in
+ * position 2 !?!
+ */
+ brw_math(&c->func,
+ c->inv_det,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ c->det,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+
+}
+
+#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \
+ FRAG_BIT_COL0 | \
+ FRAG_BIT_COL1)
+
+static boolean calculate_masks( struct brw_sf_compile *c,
+ unsigned reg,
+ ushort *pc,
+ ushort *pc_persp,
+ ushort *pc_linear)
+{
+ boolean is_last_attr = (reg == c->nr_setup_regs - 1);
+ unsigned persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS;
+ unsigned linear_mask;
+
+ if (c->key.do_flat_shading)
+ linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
+ else
+ linear_mask = c->key.attrs;
+
+ *pc_persp = 0;
+ *pc_linear = 0;
+ *pc = 0xf;
+
+ if (persp_mask & (1 << c->idx_to_attr[reg*2]))
+ *pc_persp = 0xf;
+
+ if (linear_mask & (1 << c->idx_to_attr[reg*2]))
+ *pc_linear = 0xf;
+
+ /* Maybe only processs one attribute on the final round:
+ */
+ if (reg*2+1 < c->nr_setup_attrs) {
+ *pc |= 0xf0;
+
+ if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
+ *pc_persp |= 0xf0;
+
+ if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
+ *pc_linear |= 0xf0;
+ }
+
+ return is_last_attr;
+}
+
+
+
+void brw_emit_tri_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ unsigned i;
+
+ c->nr_verts = 3;
+ alloc_regs(c);
+ invert_det(c);
+ copy_z_inv_w(c);
+
+ if (c->key.do_twoside_color)
+ do_twoside_color(c);
+
+ if (c->key.do_flat_shading)
+ do_flatshade_triangle(c);
+
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ struct brw_reg a2 = offset(c->vert[2], i);
+ ushort pc, pc_persp, pc_linear;
+ boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ brw_MUL(p, a2, a2, c->inv_w[2]);
+ }
+
+
+ /* Calculate coefficients for interpolated values:
+ */
+ if (pc_linear)
+ {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+ brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
+
+ /* calculate dA/dx
+ */
+ brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
+ brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ /* calculate dA/dy
+ */
+ brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
+ brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
+ * the send instruction:
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* offset */
+ BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+ }
+ }
+}
+
+
+
+void brw_emit_line_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ unsigned i;
+
+
+ c->nr_verts = 2;
+ alloc_regs(c);
+ invert_det(c);
+ copy_z_inv_w(c);
+
+ if (c->key.do_flat_shading)
+ do_flatshade_line(c);
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ ushort pc, pc_persp, pc_linear;
+ boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ }
+
+ /* Calculate coefficients for position, color:
+ */
+ if (pc_linear) {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ unsigned i;
+
+ c->nr_verts = 1;
+ alloc_regs(c);
+ copy_z_inv_w(c);
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
+ struct brw_reg a0 = offset(c->vert[0], i);
+ ushort pc, pc_persp, pc_linear;
+ boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ if (!tex->CoordReplace) {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ }
+ }
+
+ if (tex->CoordReplace) {
+ /* Caculate 1.0/PointWidth */
+ brw_math(&c->func,
+ c->tmp,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ c->dx0,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->key.SpriteOrigin == GL_UPPER_LEFT) {
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+ brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
+ brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+ } else {
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+ }
+ } else {
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+ if (tex->CoordReplace) {
+ if (c->key.SpriteOrigin == GL_UPPER_LEFT) {
+ brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
+ brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
+ }
+ else
+ brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+ } else {
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+ }
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+/* Points setup - several simplifications as all attributes are
+ * constant across the face of the point (point sprites excluded!)
+ */
+void brw_emit_point_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ unsigned i;
+
+ c->nr_verts = 1;
+ alloc_regs(c);
+ copy_z_inv_w(c);
+
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ struct brw_reg a0 = offset(c->vert[0], i);
+ ushort pc, pc_persp, pc_linear;
+ boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ /* This seems odd as the values are all constant, but the
+ * fragment shader will be expecting it:
+ */
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ }
+
+
+ /* The delta values are always zero, just send the starting
+ * coordinate. Again, this is to fit in with the interpolation
+ * code in the fragment shader.
+ */
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+void brw_emit_anyprim_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
+ struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
+ struct brw_reg primmask;
+ struct brw_instruction *jmp;
+ struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+
+ alloc_regs(c);
+
+ primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, primmask, brw_imm_ud(1));
+ brw_SHL(p, primmask, primmask, payload_prim);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
+ (1<<_3DPRIM_TRISTRIP) |
+ (1<<_3DPRIM_TRIFAN) |
+ (1<<_3DPRIM_TRISTRIP_REVERSE) |
+ (1<<_3DPRIM_POLYGON) |
+ (1<<_3DPRIM_RECTLIST) |
+ (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ {
+ brw_push_insn_state(p);
+ brw_emit_tri_setup( c );
+ brw_pop_insn_state(p);
+ /* note - thread killed in subroutine */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
+ (1<<_3DPRIM_LINESTRIP) |
+ (1<<_3DPRIM_LINELOOP) |
+ (1<<_3DPRIM_LINESTRIP_CONT) |
+ (1<<_3DPRIM_LINESTRIP_BF) |
+ (1<<_3DPRIM_LINESTRIP_CONT_BF)));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ {
+ brw_push_insn_state(p);
+ brw_emit_line_setup( c );
+ brw_pop_insn_state(p);
+ /* note - thread killed in subroutine */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ {
+ brw_push_insn_state(p);
+ brw_emit_point_sprite_setup( c );
+ brw_pop_insn_state(p);
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_emit_point_setup( c );
+}
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_sf_state.c b/src/mesa/pipe/i965simple/brw_sf_state.c
new file mode 100644
index 0000000000..7b6ee215eb
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_sf_state.c
@@ -0,0 +1,236 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#if 0
+static void upload_sf_vp(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_sf_viewport sfv;
+ struct intel_renderbuffer *irb =
+ intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0][0]);
+ float y_scale, y_bias;
+ int x, y, w, h, x1, x2, y1, y2;
+ int draw_h = ctx->DrawBuffer->Height;
+
+ memset(&sfv, 0, sizeof(sfv));
+
+ if (ctx->DrawBuffer->Name) {
+ /* User-created FBO */
+ if (irb && !irb->RenderToTexture) {
+ y_scale = -1.0;
+ y_bias = draw_h;
+ } else {
+ y_scale = 1.0;
+ y_bias = 0;
+ }
+ } else {
+ if (brw->intel.driDrawable != NULL) {
+ y_scale = -1.0;
+ y_bias = draw_h;
+ } else {
+ y_scale = 1.0;
+ y_bias = 0;
+ }
+ }
+
+ /* _NEW_VIEWPORT, BRW_NEW_METAOPS */
+
+ if (!brw->metaops.active) {
+ const float *v = brw->intel.ctx.Viewport._WindowMap.m;
+
+ sfv.viewport.m00 = v[MAT_SX];
+ sfv.viewport.m11 = v[MAT_SY] * y_scale;
+ sfv.viewport.m22 = v[MAT_SZ] * brw->intel.depth_scale;
+ sfv.viewport.m30 = v[MAT_TX];
+ sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+ sfv.viewport.m32 = v[MAT_TZ] * brw->intel.depth_scale;
+ } else {
+ sfv.viewport.m00 = 1;
+ sfv.viewport.m11 = - 1;
+ sfv.viewport.m22 = 1;
+ sfv.viewport.m30 = 0;
+ sfv.viewport.m31 = brw->intel.driDrawable->h;
+ sfv.viewport.m32 = 0;
+ }
+
+ /* _NEW_SCISSOR */
+ x = brw->attribs.Scissor->X;
+ y = brw->attribs.Scissor->Y;
+ w = brw->attribs.Scissor->Width;
+ h = brw->attribs.Scissor->Height;
+
+ if (ctx->DrawBuffer->Name == 0) {
+ x1 = x;
+ y1 = draw_h - (y + h);
+ x2 = x + w - 1;
+ y2 = y1 + h - 1;
+ } else {
+ /* FBO has non-inverted coords. */
+ x1 = x;
+ y1 = y;
+ x2 = x + w - 1;
+ y2 = y + h - 1;
+ }
+
+ sfv.scissor.xmin = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1);
+ sfv.scissor.xmax = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1);
+ sfv.scissor.ymin = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1);
+ sfv.scissor.ymax = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1);
+
+ brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv );
+}
+
+const struct brw_tracked_state brw_sf_vp = {
+ .dirty = {
+ .mesa = (_NEW_VIEWPORT |
+ _NEW_SCISSOR),
+ .brw = BRW_NEW_METAOPS,
+ .cache = 0
+ },
+ .update = upload_sf_vp
+};
+
+static void upload_sf_unit( struct brw_context *brw )
+{
+ struct brw_sf_unit_state sf;
+ memset(&sf, 0, sizeof(sf));
+
+ /* CACHE_NEW_SF_PROG */
+ sf.thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1;
+ sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6;
+ sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+
+ sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ sf.thread3.dispatch_grf_start_reg = 3;
+ sf.thread3.urb_entry_read_offset = 1;
+
+ /* BRW_NEW_URB_FENCE */
+ sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries;
+ sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1;
+ sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1;
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ sf.thread4.max_threads = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ sf.thread4.stats_enable = 1;
+
+ /* CACHE_NEW_SF_VP */
+ sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5;
+
+ sf.sf5.viewport_transform = 1;
+
+ /* _NEW_SCISSOR */
+ if (brw->attribs.Scissor->Enabled)
+ sf.sf6.scissor = 1;
+
+ /* _NEW_POLYGON */
+ if (brw->attribs.Polygon->FrontFace == GL_CCW)
+ sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+ else
+ sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+
+ if (brw->attribs.Polygon->CullFlag) {
+ switch (brw->attribs.Polygon->CullFaceMode) {
+ case GL_FRONT:
+ sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
+ break;
+ case GL_BACK:
+ sf.sf6.cull_mode = BRW_CULLMODE_BACK;
+ break;
+ case GL_FRONT_AND_BACK:
+ sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ else
+ sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+
+
+ /* _NEW_LINE */
+ /* XXX use ctx->Const.Min/MaxLineWidth here */
+ sf.sf6.line_width = CLAMP(brw->attribs.Line->Width, 1.0, 5.0) * (1<<1);
+
+ sf.sf6.line_endcap_aa_region_width = 1;
+ if (brw->attribs.Line->SmoothFlag)
+ sf.sf6.aa_enable = 1;
+ else if (sf.sf6.line_width <= 0x2)
+ sf.sf6.line_width = 0;
+
+ /* _NEW_POINT */
+ sf.sf6.point_rast_rule = 1; /* opengl conventions */
+ /* XXX clamp max depends on AA vs. non-AA */
+
+ sf.sf7.sprite_point = brw->attribs.Point->PointSprite;
+ sf.sf7.point_size = CLAMP(brw->attribs.Point->Size, 1.0, 255.0) * (1<<3);
+ sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated;
+
+ /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
+ */
+ sf.sf7.trifan_pv = 2;
+ sf.sf7.linestrip_pv = 1;
+ sf.sf7.tristrip_pv = 2;
+ sf.sf7.line_last_pixel_enable = 0;
+
+ /* Set bias for OpenGL rasterization rules:
+ */
+ sf.sf6.dest_org_vbias = 0x8;
+ sf.sf6.dest_org_hbias = 0x8;
+
+ brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf );
+}
+
+
+const struct brw_tracked_state brw_sf_unit = {
+ .dirty = {
+ .mesa = (_NEW_POLYGON |
+ _NEW_LINE |
+ _NEW_POINT |
+ _NEW_SCISSOR),
+ .brw = (BRW_NEW_URB_FENCE |
+ BRW_NEW_METAOPS),
+ .cache = (CACHE_NEW_SF_VP |
+ CACHE_NEW_SF_PROG)
+ },
+ .update = upload_sf_unit
+};
+
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_state.c b/src/mesa/pipe/i965simple/brw_state.c
new file mode 100644
index 0000000000..ff4ae7999b
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_state.c
@@ -0,0 +1,451 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Zack Rusin <zack@tungstengraphics.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_winsys.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_dump.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_draw.h"
+
+
+#define DUP( TYPE, VAL ) \
+do { \
+ struct TYPE *x = malloc(sizeof(*x)); \
+ memcpy(x, VAL, sizeof(*x) ); \
+ return x; \
+} while (0)
+
+/************************************************************************
+ * Blend
+ */
+static void *
+brw_create_blend_state(struct pipe_context *pipe,
+ const struct pipe_blend_state *blend)
+{
+ DUP( pipe_blend_state, blend );
+}
+
+static void brw_bind_blend_state(struct pipe_context *pipe,
+ void *blend)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Blend = (struct pipe_blend_state*)blend;
+ brw->state.dirty.brw |= BRW_NEW_BLEND;
+}
+
+
+static void brw_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+ free(blend);
+}
+
+static void brw_set_blend_color( struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.BlendColor = *blend_color;
+
+ brw->state.dirty.brw |= BRW_NEW_BLEND;
+}
+
+/************************************************************************
+ * Sampler
+ */
+
+static void *
+brw_create_sampler_state(struct pipe_context *pipe,
+ const struct pipe_sampler_state *sampler)
+{
+ DUP( pipe_sampler_state, sampler );
+}
+
+static void brw_bind_sampler_state(struct pipe_context *pipe,
+ unsigned unit, void *sampler)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Samplers[unit] = sampler;
+ brw->state.dirty.brw |= BRW_NEW_SAMPLER;
+}
+
+static void brw_delete_sampler_state(struct pipe_context *pipe,
+ void *sampler)
+{
+ free(sampler);
+}
+
+
+/************************************************************************
+ * Depth stencil
+ */
+
+static void *
+brw_create_depth_stencil_state(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_state *depth_stencil)
+{
+ DUP( pipe_depth_stencil_state, depth_stencil );
+}
+
+static void brw_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.DepthStencil = (const struct pipe_depth_stencil_state *)depth_stencil;
+
+ brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL;
+}
+
+static void brw_delete_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ free(depth_stencil);
+}
+
+/************************************************************************
+ * Alpha test
+ */
+static void *
+brw_create_alpha_test_state(struct pipe_context *pipe,
+ const struct pipe_alpha_test_state *alpha_test)
+{
+ DUP(pipe_alpha_test_state, alpha_test);
+}
+
+static void brw_bind_alpha_test_state(struct pipe_context *pipe,
+ void *alpha)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.AlphaTest = (const struct pipe_alpha_test_state*)alpha;
+
+ brw->state.dirty.brw |= BRW_NEW_ALPHA_TEST;
+}
+
+static void brw_delete_alpha_test_state(struct pipe_context *pipe,
+ void *alpha)
+{
+ free(alpha);
+}
+
+/************************************************************************
+ * Scissor
+ */
+static void brw_set_scissor_state( struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) );
+ brw->state.dirty.brw |= BRW_NEW_SCISSOR;
+}
+
+
+/************************************************************************
+ * Stipple
+ */
+
+static void brw_set_polygon_stipple( struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple )
+{
+}
+
+
+/************************************************************************
+ * Fragment shader
+ */
+
+static void * brw_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *shader)
+{
+ struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program);
+
+ /* XXX: Do I have to duplicate the tokens as well??
+ */
+ brw_fp->program = *shader;
+
+ return (void *)brw_fp;
+}
+
+static void brw_bind_fs_state(struct pipe_context *pipe, void *shader)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader;
+ brw->state.dirty.brw |= BRW_NEW_FS;
+}
+
+static void brw_delete_fs_state(struct pipe_context *pipe, void *shader)
+{
+ FREE(shader);
+}
+
+
+/************************************************************************
+ * Vertex shader and other TNL state
+ */
+
+static void *brw_create_vs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *shader)
+{
+ struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program);
+
+ /* XXX: Do I have to duplicate the tokens as well??
+ */
+ brw_vp->program = *shader;
+
+ tgsi_dump(shader->tokens, 0);
+
+ return (void *)brw_vp;
+}
+
+static void brw_bind_vs_state(struct pipe_context *pipe, void *vs)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.VertexProgram = (struct brw_vertex_program *)vs;
+ brw->state.dirty.brw |= BRW_NEW_VS;
+
+ printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n");
+}
+
+static void brw_delete_vs_state(struct pipe_context *pipe, void *shader)
+{
+ FREE(shader);
+}
+
+
+static void brw_set_clip_state( struct pipe_context *pipe,
+ const struct pipe_clip_state *clip )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Clip = *clip;
+}
+
+
+static void brw_set_viewport_state( struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Viewport = *viewport; /* struct copy */
+ brw->state.dirty.brw |= BRW_NEW_VIEWPORT;
+
+ /* pass the viewport info to the draw module */
+ //draw_set_viewport_state(brw->draw, viewport);
+}
+
+
+static void brw_set_vertex_buffer( struct pipe_context *pipe,
+ unsigned index,
+ const struct pipe_vertex_buffer *buffer )
+{
+ struct brw_context *brw = brw_context(pipe);
+ brw->vb.vbo_array[index] = *buffer;
+ if (index > brw->vb.last_vb)
+ brw->vb.last_vb = index;
+ assert(brw->vb.last_vb < BRW_VEP_MAX);
+}
+
+static void brw_set_vertex_element(struct pipe_context *pipe,
+ unsigned index,
+ const struct pipe_vertex_element *element)
+{
+ /* flush ? */
+ struct brw_context *brw = brw_context(pipe);
+
+ assert(index < PIPE_ATTRIB_MAX);
+ struct brw_vertex_element el;
+ memset(&el, 0, sizeof(struct brw_vertex_element));
+
+ /* do we need those anymore?*/
+ el.index = index;
+#if 0
+ /*FIXME*/
+ el.element_size = 0;
+ el.count = 0;
+ el.vbo_rebase_offset = 0;
+#endif
+
+ el.vep.ve0.src_offset = element->src_offset;
+ el.vep.ve0.src_format = brw_translate_surface_format(element->src_format);
+ el.vep.ve0.valid = 1;
+ el.vep.ve0.vertex_buffer_index = element->vertex_buffer_index;
+
+ el.vep.ve1.dst_offset = element->dst_offset;
+ el.vep.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC;
+ el.vep.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC;
+ el.vep.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC;
+ el.vep.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC;
+ /*can we count of brw->vb.vbo_array[element->vertex_buffer_index]
+ * being initialized ok to actually compute vbcomponent's
+ * correctly? */
+
+ brw->vb.inputs[index] = el;
+}
+
+
+
+/************************************************************************
+ * Constant buffers
+ */
+
+static void brw_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ assert(buf == 0 || index == 0);
+
+ brw->attribs.Constants[shader] = buf;
+ brw->state.dirty.brw |= BRW_NEW_CONSTANTS;
+}
+
+
+/************************************************************************
+ * Texture surfaces
+ */
+
+
+static void brw_set_sampler_texture(struct pipe_context *pipe,
+ unsigned unit,
+ struct pipe_texture *texture)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Texture[unit] = (struct brw_texture*)texture; /* ptr, not struct */
+
+ brw->state.dirty.brw |= BRW_NEW_TEXTURE;
+}
+
+
+/************************************************************************
+ * Render targets, etc
+ */
+
+static void brw_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.FrameBuffer = *fb; /* struct copy */
+
+ brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER;
+}
+
+
+
+/************************************************************************
+ * Rasterizer state
+ */
+
+static void *
+brw_create_rasterizer_state(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *rasterizer)
+{
+ DUP(pipe_rasterizer_state, rasterizer);
+}
+
+static void brw_bind_rasterizer_state( struct pipe_context *pipe,
+ void *setup )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Raster = (struct pipe_rasterizer_state *)setup;
+
+ /* Also pass-through to draw module:
+ */
+ //draw_set_rasterizer_state(brw->draw, setup);
+
+ brw->state.dirty.brw |= BRW_NEW_RASTERIZER;
+}
+
+static void brw_delete_rasterizer_state(struct pipe_context *pipe,
+ void *setup)
+{
+ free(setup);
+}
+
+
+
+void
+brw_init_state_functions( struct brw_context *brw )
+{
+ brw->pipe.create_alpha_test_state = brw_create_alpha_test_state;
+ brw->pipe.bind_alpha_test_state = brw_bind_alpha_test_state;
+ brw->pipe.delete_alpha_test_state = brw_delete_alpha_test_state;
+
+ brw->pipe.create_blend_state = brw_create_blend_state;
+ brw->pipe.bind_blend_state = brw_bind_blend_state;
+ brw->pipe.delete_blend_state = brw_delete_blend_state;
+
+ brw->pipe.create_sampler_state = brw_create_sampler_state;
+ brw->pipe.bind_sampler_state = brw_bind_sampler_state;
+ brw->pipe.delete_sampler_state = brw_delete_sampler_state;
+
+ brw->pipe.create_depth_stencil_state = brw_create_depth_stencil_state;
+ brw->pipe.bind_depth_stencil_state = brw_bind_depth_stencil_state;
+ brw->pipe.delete_depth_stencil_state = brw_delete_depth_stencil_state;
+
+ brw->pipe.create_rasterizer_state = brw_create_rasterizer_state;
+ brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state;
+ brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state;
+ brw->pipe.create_fs_state = brw_create_fs_state;
+ brw->pipe.bind_fs_state = brw_bind_fs_state;
+ brw->pipe.delete_fs_state = brw_delete_fs_state;
+ brw->pipe.create_vs_state = brw_create_vs_state;
+ brw->pipe.bind_vs_state = brw_bind_vs_state;
+ brw->pipe.delete_vs_state = brw_delete_vs_state;
+
+ brw->pipe.set_blend_color = brw_set_blend_color;
+ brw->pipe.set_clip_state = brw_set_clip_state;
+ brw->pipe.set_constant_buffer = brw_set_constant_buffer;
+ brw->pipe.set_framebuffer_state = brw_set_framebuffer_state;
+
+// brw->pipe.set_feedback_state = brw_set_feedback_state;
+// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer;
+
+ brw->pipe.set_polygon_stipple = brw_set_polygon_stipple;
+ brw->pipe.set_scissor_state = brw_set_scissor_state;
+ brw->pipe.set_sampler_texture = brw_set_sampler_texture;
+ brw->pipe.set_viewport_state = brw_set_viewport_state;
+ brw->pipe.set_vertex_buffer = brw_set_vertex_buffer;
+ brw->pipe.set_vertex_element = brw_set_vertex_element;
+}
diff --git a/src/mesa/pipe/i965simple/brw_state.h b/src/mesa/pipe/i965simple/brw_state.h
new file mode 100644
index 0000000000..4dabfe8082
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_state.h
@@ -0,0 +1,157 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_STATE_H
+#define BRW_STATE_H
+
+#include "brw_context.h"
+#include "brw_winsys.h"
+
+
+const struct brw_tracked_state brw_blend_constant_color;
+const struct brw_tracked_state brw_cc_unit;
+const struct brw_tracked_state brw_cc_vp;
+const struct brw_tracked_state brw_clip_prog;
+const struct brw_tracked_state brw_clip_unit;
+const struct brw_tracked_state brw_constant_buffer_state;
+const struct brw_tracked_state brw_constant_buffer;
+const struct brw_tracked_state brw_curbe_offsets;
+const struct brw_tracked_state brw_invarient_state;
+const struct brw_tracked_state brw_gs_prog;
+const struct brw_tracked_state brw_gs_unit;
+const struct brw_tracked_state brw_drawing_rect;
+const struct brw_tracked_state brw_line_stipple;
+const struct brw_tracked_state brw_pipelined_state_pointers;
+const struct brw_tracked_state brw_binding_table_pointers;
+const struct brw_tracked_state brw_depthbuffer;
+const struct brw_tracked_state brw_polygon_stipple_offset;
+const struct brw_tracked_state brw_polygon_stipple;
+const struct brw_tracked_state brw_program_parameters;
+const struct brw_tracked_state brw_recalculate_urb_fence;
+const struct brw_tracked_state brw_sf_prog;
+const struct brw_tracked_state brw_sf_unit;
+const struct brw_tracked_state brw_sf_vp;
+const struct brw_tracked_state brw_state_base_address;
+const struct brw_tracked_state brw_urb_fence;
+const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_prog;
+const struct brw_tracked_state brw_vs_unit;
+const struct brw_tracked_state brw_wm_input_sizes;
+const struct brw_tracked_state brw_wm_prog;
+const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_surfaces;
+const struct brw_tracked_state brw_wm_unit;
+
+const struct brw_tracked_state brw_psp_urb_cbs;
+
+const struct brw_tracked_state brw_active_vertprog;
+const struct brw_tracked_state brw_tnl_vertprog;
+const struct brw_tracked_state brw_pipe_control;
+
+const struct brw_tracked_state brw_clear_surface_cache;
+const struct brw_tracked_state brw_clear_batch_cache;
+
+/***********************************************************************
+ * brw_state_cache.c
+ */
+unsigned brw_cache_data(struct brw_cache *cache,
+ const void *data );
+
+unsigned brw_cache_data_sz(struct brw_cache *cache,
+ const void *data,
+ unsigned data_sz);
+
+unsigned brw_upload_cache( struct brw_cache *cache,
+ const void *key,
+ unsigned key_sz,
+ const void *data,
+ unsigned data_sz,
+ const void *aux,
+ void *aux_return );
+
+boolean brw_search_cache( struct brw_cache *cache,
+ const void *key,
+ unsigned key_size,
+ void *aux_return,
+ unsigned *offset_return);
+
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
+
+static inline struct pipe_buffer_handle *brw_cache_buffer(struct brw_context *brw,
+ enum brw_cache_id id)
+{
+ return brw->cache[id].pool->buffer;
+}
+
+/***********************************************************************
+ * brw_state_batch.c
+ */
+#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->winsys, (s), sizeof(*(s)))
+#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
+
+boolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ unsigned sz );
+
+void brw_destroy_batch_cache( struct brw_context *brw );
+
+
+/***********************************************************************
+ * brw_state_pool.c
+ */
+void brw_init_pools( struct brw_context *brw );
+void brw_destroy_pools( struct brw_context *brw );
+
+boolean brw_pool_alloc( struct brw_mem_pool *pool,
+ unsigned size,
+ unsigned alignment,
+ unsigned *offset_return);
+
+void brw_pool_fence( struct brw_context *brw,
+ struct brw_mem_pool *pool,
+ unsigned fence );
+
+
+void brw_pool_check_wrap( struct brw_context *brw,
+ struct brw_mem_pool *pool );
+
+void brw_clear_all_caches( struct brw_context *brw );
+void brw_invalidate_pools( struct brw_context *brw );
+void brw_clear_batch_cache_flush( struct brw_context *brw );
+
+void brw_upload_cc_unit(struct brw_context *brw);
+void brw_upload_clip_prog(struct brw_context *brw);
+void brw_upload_blend_constant_color(struct brw_context *brw);
+void brw_upload_wm_samplers(struct brw_context *brw);
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_state_batch.c b/src/mesa/pipe/i965simple/brw_state_batch.c
new file mode 100644
index 0000000000..35db76b594
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_state_batch.c
@@ -0,0 +1,113 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_state.h"
+#include "brw_winsys.h"
+
+#include "pipe/p_util.h"
+
+/* A facility similar to the data caching code above, which aims to
+ * prevent identical commands being issued repeatedly.
+ */
+boolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ unsigned sz )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+ struct header *newheader = (struct header *)data;
+
+ if (brw->emit_state_always) {
+ brw_batchbuffer_data(brw->winsys, data, sz);
+ return TRUE;
+ }
+
+ while (item) {
+ if (item->header->opcode == newheader->opcode) {
+ if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
+ return FALSE;
+ if (item->sz != sz) {
+ FREE(item->header);
+ item->header = MALLOC(sz);
+ item->sz = sz;
+ }
+ goto emit;
+ }
+ item = item->next;
+ }
+
+ assert(!item);
+ item = CALLOC_STRUCT(brw_cached_batch_item);
+ item->header = MALLOC(sz);
+ item->sz = sz;
+ item->next = brw->cached_batch_items;
+ brw->cached_batch_items = item;
+
+emit:
+ memcpy(item->header, newheader, sz);
+ brw_batchbuffer_data(brw->winsys, data, sz);
+ return TRUE;
+}
+
+static void clear_batch_cache( struct brw_context *brw )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+
+ while (item) {
+ struct brw_cached_batch_item *next = item->next;
+ free((void *)item->header);
+ free(item);
+ item = next;
+ }
+
+ brw->cached_batch_items = NULL;
+
+
+ brw_clear_all_caches(brw);
+
+ brw_invalidate_pools(brw);
+}
+
+void brw_clear_batch_cache_flush( struct brw_context *brw )
+{
+ clear_batch_cache(brw);
+
+/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */
+
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+}
+
+
+
+void brw_destroy_batch_cache( struct brw_context *brw )
+{
+ clear_batch_cache(brw);
+}
diff --git a/src/mesa/pipe/i965simple/brw_state_cache.c b/src/mesa/pipe/i965simple/brw_state_cache.c
new file mode 100644
index 0000000000..13e262d2e5
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_state_cache.c
@@ -0,0 +1,442 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_state.h"
+
+#include "brw_wm.h"
+#include "brw_vs.h"
+#include "brw_clip.h"
+#include "brw_sf.h"
+#include "brw_gs.h"
+
+#include "pipe/p_util.h"
+
+
+
+/***********************************************************************
+ * Check cache for uploaded version of struct, else upload new one.
+ * Fail when memory is exhausted.
+ *
+ * XXX: FIXME: Currently search is so slow it would be quicker to
+ * regenerate the data every time...
+ */
+
+static unsigned hash_key( const void *key, unsigned key_size )
+{
+ unsigned *ikey = (unsigned *)key;
+ unsigned hash = 0, i;
+
+ assert(key_size % 4 == 0);
+
+ /* I'm sure this can be improved on:
+ */
+ for (i = 0; i < key_size/4; i++)
+ hash ^= ikey[i];
+
+ return hash;
+}
+
+static struct brw_cache_item *search_cache( struct brw_cache *cache,
+ unsigned hash,
+ const void *key,
+ unsigned key_size)
+{
+ struct brw_cache_item *c;
+
+ for (c = cache->items[hash % cache->size]; c; c = c->next) {
+ if (c->hash == hash &&
+ c->key_size == key_size &&
+ memcmp(c->key, key, key_size) == 0)
+ return c;
+ }
+
+ return NULL;
+}
+
+
+static void rehash( struct brw_cache *cache )
+{
+ struct brw_cache_item **items;
+ struct brw_cache_item *c, *next;
+ unsigned size, i;
+
+ size = cache->size * 3;
+ items = (struct brw_cache_item**) MALLOC(size * sizeof(*items));
+ memset(items, 0, size * sizeof(*items));
+
+ for (i = 0; i < cache->size; i++)
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ c->next = items[c->hash % size];
+ items[c->hash % size] = c;
+ }
+
+ FREE(cache->items);
+ cache->items = items;
+ cache->size = size;
+}
+
+
+boolean brw_search_cache( struct brw_cache *cache,
+ const void *key,
+ unsigned key_size,
+ void *aux_return,
+ unsigned *offset_return)
+{
+ struct brw_cache_item *item;
+ unsigned addr = 0;
+ unsigned hash = hash_key(key, key_size);
+
+ item = search_cache(cache, hash, key, key_size);
+
+ if (item) {
+ if (aux_return)
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+
+ *offset_return = addr = item->offset;
+ }
+
+ if (item == NULL || addr != cache->last_addr) {
+ cache->brw->state.dirty.cache |= 1<<cache->id;
+ cache->last_addr = addr;
+ }
+
+ return item != NULL;
+}
+
+unsigned brw_upload_cache( struct brw_cache *cache,
+ const void *key,
+ unsigned key_size,
+ const void *data,
+ unsigned data_size,
+ const void *aux,
+ void *aux_return )
+{
+ unsigned offset;
+ struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+ unsigned hash = hash_key(key, key_size);
+ void *tmp = MALLOC(key_size + cache->aux_size);
+
+ if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) {
+ /* Should not be possible:
+ */
+ printf("brw_pool_alloc failed\n");
+ exit(1);
+ }
+
+ memcpy(tmp, key, key_size);
+
+ if (cache->aux_size)
+ memcpy(tmp+key_size, aux, cache->aux_size);
+
+ item->key = tmp;
+ item->hash = hash;
+ item->key_size = key_size;
+ item->offset = offset;
+ item->data_size = data_size;
+
+ if (++cache->n_items > cache->size * 1.5)
+ rehash(cache);
+
+ hash %= cache->size;
+ item->next = cache->items[hash];
+ cache->items[hash] = item;
+
+ if (aux_return) {
+ assert(cache->aux_size);
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+ }
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ printf("upload %s: %d bytes to pool buffer %p offset %x\n",
+ cache->name, data_size,
+ cache->pool->buffer,
+ offset);
+
+ /* Copy data to the buffer:
+ */
+ cache->brw->winsys->buffer_subdata_typed(cache->brw->winsys,
+ cache->pool->buffer,
+ offset,
+ data_size,
+ data,
+ cache->id);
+
+ cache->brw->state.dirty.cache |= 1<<cache->id;
+ cache->last_addr = offset;
+
+ return offset;
+}
+
+/* This doesn't really work with aux data. Use search/upload instead
+ */
+unsigned brw_cache_data_sz(struct brw_cache *cache,
+ const void *data,
+ unsigned data_size)
+{
+ unsigned addr;
+
+ if (!brw_search_cache(cache, data, data_size, NULL, &addr)) {
+ addr = brw_upload_cache(cache,
+ data, data_size,
+ data, data_size,
+ NULL, NULL);
+ }
+
+ return addr;
+}
+
+unsigned brw_cache_data(struct brw_cache *cache,
+ const void *data)
+{
+ return brw_cache_data_sz(cache, data, cache->key_size);
+}
+
+enum pool_type {
+ DW_SURFACE_STATE,
+ DW_GENERAL_STATE
+};
+
+static void brw_init_cache( struct brw_context *brw,
+ const char *name,
+ unsigned id,
+ unsigned key_size,
+ unsigned aux_size,
+ enum pool_type pool_type)
+{
+ struct brw_cache *cache = &brw->cache[id];
+ cache->brw = brw;
+ cache->id = id;
+ cache->name = name;
+ cache->items = NULL;
+
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ CALLOC(cache->size, sizeof(struct brw_cache_item));
+
+
+ cache->key_size = key_size;
+ cache->aux_size = aux_size;
+ switch (pool_type) {
+ case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break;
+ case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break;
+ default: assert(0); break;
+ }
+}
+
+void brw_init_caches( struct brw_context *brw )
+{
+
+ brw_init_cache(brw,
+ "CC_VP",
+ BRW_CC_VP,
+ sizeof(struct brw_cc_viewport),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "CC_UNIT",
+ BRW_CC_UNIT,
+ sizeof(struct brw_cc_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "WM_PROG",
+ BRW_WM_PROG,
+ sizeof(struct brw_wm_prog_key),
+ sizeof(struct brw_wm_prog_data),
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SAMPLER_DEFAULT_COLOR",
+ BRW_SAMPLER_DEFAULT_COLOR,
+ sizeof(struct brw_sampler_default_color),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SAMPLER",
+ BRW_SAMPLER,
+ 0, /* variable key/data size */
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "WM_UNIT",
+ BRW_WM_UNIT,
+ sizeof(struct brw_wm_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SF_PROG",
+ BRW_SF_PROG,
+ sizeof(struct brw_sf_prog_key),
+ sizeof(struct brw_sf_prog_data),
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SF_VP",
+ BRW_SF_VP,
+ sizeof(struct brw_sf_viewport),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SF_UNIT",
+ BRW_SF_UNIT,
+ sizeof(struct brw_sf_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "VS_UNIT",
+ BRW_VS_UNIT,
+ sizeof(struct brw_vs_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "VS_PROG",
+ BRW_VS_PROG,
+ sizeof(struct brw_vs_prog_key),
+ sizeof(struct brw_vs_prog_data),
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "CLIP_UNIT",
+ BRW_CLIP_UNIT,
+ sizeof(struct brw_clip_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "CLIP_PROG",
+ BRW_CLIP_PROG,
+ sizeof(struct brw_clip_prog_key),
+ sizeof(struct brw_clip_prog_data),
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "GS_UNIT",
+ BRW_GS_UNIT,
+ sizeof(struct brw_gs_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "GS_PROG",
+ BRW_GS_PROG,
+ sizeof(struct brw_gs_prog_key),
+ sizeof(struct brw_gs_prog_data),
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SS_SURFACE",
+ BRW_SS_SURFACE,
+ sizeof(struct brw_surface_state),
+ 0,
+ DW_SURFACE_STATE);
+
+ brw_init_cache(brw,
+ "SS_SURF_BIND",
+ BRW_SS_SURF_BIND,
+ sizeof(struct brw_surface_binding_table),
+ 0,
+ DW_SURFACE_STATE);
+}
+
+
+/* When we lose hardware context, need to invalidate the surface cache
+ * as these structs must be explicitly re-uploaded. They are subject
+ * to fixup by the memory manager as they contain absolute agp
+ * offsets, so we need to ensure there is a fresh version of the
+ * struct available to receive the fixup.
+ *
+ * XXX: Need to ensure that there aren't two versions of a surface or
+ * bufferobj with different backing data active in the same buffer at
+ * once? Otherwise the cache could confuse them. Maybe better not to
+ * cache at all?
+ *
+ * --> Isn't this the same as saying need to ensure batch is flushed
+ * before new data is uploaded to an existing buffer? We
+ * already try to make sure of that.
+ */
+static void clear_cache( struct brw_cache *cache )
+{
+ struct brw_cache_item *c, *next;
+ unsigned i;
+
+ for (i = 0; i < cache->size; i++) {
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ free((void *)c->key);
+ free(c);
+ }
+ cache->items[i] = NULL;
+ }
+
+ cache->n_items = 0;
+}
+
+void brw_clear_all_caches( struct brw_context *brw )
+{
+ int i;
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ for (i = 0; i < BRW_MAX_CACHE; i++)
+ clear_cache(&brw->cache[i]);
+
+ if (brw->curbe.last_buf) {
+ FREE(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ }
+
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+}
+
+
+
+
+
+void brw_destroy_caches( struct brw_context *brw )
+{
+ unsigned i;
+
+ for (i = 0; i < BRW_MAX_CACHE; i++)
+ clear_cache(&brw->cache[i]);
+}
diff --git a/src/mesa/pipe/i965simple/brw_state_pool.c b/src/mesa/pipe/i965simple/brw_state_pool.c
new file mode 100644
index 0000000000..a490049024
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_state_pool.c
@@ -0,0 +1,139 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/** @file brw_state_pool.c
+ * Implements the state pool allocator.
+ *
+ * For the 965, we create two state pools for state cache entries. Objects
+ * will be allocated into the pools depending on which state base address
+ * their pointer is relative to in other 965 state.
+ *
+ * The state pools are relatively simple: As objects are allocated, increment
+ * the offset to allocate space. When the pool is "full" (rather, close to
+ * full), we reset the pool and reset the state cache entries that point into
+ * the pool.
+ */
+
+#include "pipe/p_winsys.h"
+#include "brw_context.h"
+#include "brw_state.h"
+
+boolean brw_pool_alloc( struct brw_mem_pool *pool,
+ unsigned size,
+ unsigned align,
+ unsigned *offset_return)
+{
+ unsigned fixup = ALIGN(pool->offset, align) - pool->offset;
+
+ size = ALIGN(size, 4);
+
+ if (pool->offset + fixup + size >= pool->size) {
+ printf("%s failed\n", __FUNCTION__);
+ assert(0);
+ exit(0);
+ }
+
+ pool->offset += fixup;
+ *offset_return = pool->offset;
+ pool->offset += size;
+
+ return TRUE;
+}
+
+static
+void brw_invalidate_pool( struct brw_mem_pool *pool )
+{
+ if (BRW_DEBUG & DEBUG_STATE)
+ printf("\n\n\n %s \n\n\n", __FUNCTION__);
+
+ pool->offset = 0;
+
+ brw_clear_all_caches(pool->brw);
+}
+
+
+static void brw_init_pool( struct brw_context *brw,
+ unsigned pool_id,
+ unsigned size )
+{
+ struct brw_mem_pool *pool = &brw->pool[pool_id];
+
+ pool->size = size;
+ pool->brw = brw;
+
+ pool->buffer = brw->pipe.winsys->buffer_create(brw->pipe.winsys,
+ 4096, 0, 0);
+
+ brw->pipe.winsys->buffer_data(brw->pipe.winsys,
+ pool->buffer,
+ size,
+ NULL,
+ 0 /* DRM_BO_FLAG_MEM_TT */);
+}
+
+static void brw_destroy_pool( struct brw_context *brw,
+ unsigned pool_id )
+{
+ struct brw_mem_pool *pool = &brw->pool[pool_id];
+
+ pool->brw->pipe.winsys->buffer_reference( pool->brw->pipe.winsys,
+ &pool->buffer,
+ NULL );
+}
+
+
+void brw_pool_check_wrap( struct brw_context *brw,
+ struct brw_mem_pool *pool )
+{
+ if (pool->offset > (pool->size * 3) / 4) {
+ brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+ }
+
+}
+
+void brw_init_pools( struct brw_context *brw )
+{
+ brw_init_pool(brw, BRW_GS_POOL, 0x80000);
+ brw_init_pool(brw, BRW_SS_POOL, 0x80000);
+}
+
+void brw_destroy_pools( struct brw_context *brw )
+{
+ brw_destroy_pool(brw, BRW_GS_POOL);
+ brw_destroy_pool(brw, BRW_SS_POOL);
+}
+
+
+void brw_invalidate_pools( struct brw_context *brw )
+{
+ brw_invalidate_pool(&brw->pool[BRW_GS_POOL]);
+ brw_invalidate_pool(&brw->pool[BRW_SS_POOL]);
+}
diff --git a/src/mesa/pipe/i965simple/brw_state_upload.c b/src/mesa/pipe/i965simple/brw_state_upload.c
new file mode 100644
index 0000000000..1ca7484958
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_state_upload.c
@@ -0,0 +1,231 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+
+#include "pipe/p_util.h"
+
+/* This is used to initialize brw->state.atoms[]. We could use this
+ * list directly except for a single atom, brw_constant_buffer, which
+ * has a .dirty value which changes according to the parameters of the
+ * current fragment and vertex programs, and so cannot be a static
+ * value.
+ */
+const struct brw_tracked_state *atoms[] =
+{
+ &brw_wm_input_sizes,
+ &brw_vs_prog,
+ &brw_gs_prog,
+ &brw_clip_prog,
+ &brw_sf_prog,
+ &brw_wm_prog,
+
+ /* Once all the programs are done, we know how large urb entry
+ * sizes need to be and can decide if we need to change the urb
+ * layout.
+ */
+ &brw_curbe_offsets,
+ &brw_recalculate_urb_fence,
+
+
+ &brw_cc_vp,
+ &brw_cc_unit,
+
+ &brw_wm_surfaces, /* must do before samplers */
+ &brw_wm_samplers,
+
+ &brw_wm_unit,
+ &brw_sf_vp,
+ &brw_sf_unit,
+ &brw_vs_unit, /* always required, enabled or not */
+ &brw_clip_unit,
+ &brw_gs_unit,
+
+ /* Command packets:
+ */
+ &brw_invarient_state,
+ &brw_state_base_address,
+ &brw_pipe_control,
+
+ &brw_binding_table_pointers,
+ &brw_blend_constant_color,
+
+ &brw_drawing_rect,
+ &brw_depthbuffer,
+
+ &brw_polygon_stipple,
+ &brw_polygon_stipple_offset,
+
+ &brw_line_stipple,
+
+ &brw_psp_urb_cbs,
+
+
+ NULL, /* brw_constant_buffer */
+};
+
+
+void brw_init_state( struct brw_context *brw )
+{
+ unsigned i;
+
+ brw_init_pools(brw);
+ brw_init_caches(brw);
+
+ brw->state.atoms = MALLOC(sizeof(atoms));
+ brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms);
+ memcpy(brw->state.atoms, atoms, sizeof(atoms));
+
+ /* Patch in a pointer to the dynamic state atom:
+ */
+ for (i = 0; i < brw->state.nr_atoms; i++)
+ if (brw->state.atoms[i] == NULL)
+ brw->state.atoms[i] = &brw->curbe.tracked_state;
+
+ memcpy(&brw->curbe.tracked_state,
+ &brw_constant_buffer,
+ sizeof(brw_constant_buffer));
+
+ brw->state.dirty.brw = ~0;
+ brw->emit_state_always = 0;
+
+
+}
+
+
+void brw_destroy_state( struct brw_context *brw )
+{
+ if (brw->state.atoms) {
+ FREE(brw->state.atoms);
+ brw->state.atoms = NULL;
+ }
+
+ brw_destroy_caches(brw);
+ brw_destroy_batch_cache(brw);
+ brw_destroy_pools(brw);
+}
+
+/***********************************************************************
+ */
+
+static boolean check_state( const struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ return ((a->brw & b->brw) ||
+ (a->cache & b->cache));
+}
+
+static void accumulate_state( struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ a->brw |= b->brw;
+ a->cache |= b->cache;
+}
+
+
+static void xor_states( struct brw_state_flags *result,
+ const struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ result->brw = a->brw ^ b->brw;
+ result->cache = a->cache ^ b->cache;
+}
+
+
+/***********************************************************************
+ * Emit all state:
+ */
+void brw_validate_state( struct brw_context *brw )
+{
+ struct brw_state_flags *state = &brw->state.dirty;
+ unsigned i;
+
+ if (brw->emit_state_always)
+ state->brw |= ~0;
+
+ if (state->cache == 0 &&
+ state->brw == 0)
+ return;
+
+ if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
+ brw_clear_batch_cache_flush(brw);
+
+ if (BRW_DEBUG) {
+ /* Debug version which enforces various sanity checks on the
+ * state flags which are generated and checked to help ensure
+ * state atoms are ordered correctly in the list.
+ */
+ struct brw_state_flags examined, prev;
+ memset(&examined, 0, sizeof(examined));
+ prev = *state;
+
+ for (i = 0; i < brw->state.nr_atoms; i++) {
+ const struct brw_tracked_state *atom = brw->state.atoms[i];
+ struct brw_state_flags generated;
+
+ assert(atom->dirty.brw ||
+ atom->dirty.cache);
+ assert(atom->update);
+
+ if (check_state(state, &atom->dirty) || atom->always_update) {
+ atom->update( brw );
+
+/* emit_foo(brw); */
+ }
+ if (atom->emit_reloc != NULL)
+ atom->emit_reloc(brw);
+
+ accumulate_state(&examined, &atom->dirty);
+
+ /* generated = (prev ^ state)
+ * if (examined & generated)
+ * fail;
+ */
+ xor_states(&generated, &prev, state);
+ assert(!check_state(&examined, &generated));
+ prev = *state;
+ }
+ }
+ else {
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = brw->state.atoms[i];
+
+ if (check_state(state, &atom->dirty) || atom->always_update)
+ atom->update( brw );
+ if (atom->emit_reloc != NULL)
+ atom->emit_reloc(brw);
+ }
+ }
+
+ memset(state, 0, sizeof(*state));
+}
diff --git a/src/mesa/pipe/i965simple/brw_strings.c b/src/mesa/pipe/i965simple/brw_strings.c
new file mode 100644
index 0000000000..29a41ed1e9
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_strings.c
@@ -0,0 +1,72 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "brw_context.h"
+#include "brw_reg.h"
+
+
+static const char *brw_get_vendor( struct pipe_context *pipe )
+{
+ return "Tungsten Graphics, Inc.";
+}
+
+
+static const char *brw_get_name( struct pipe_context *pipe )
+{
+ static char buffer[128];
+ const char *chipset;
+
+ switch (brw_context(pipe)->pci_id) {
+ case PCI_CHIP_I965_Q:
+ chipset = "Intel(R) 965Q";
+ break;
+ case PCI_CHIP_I965_G:
+ case PCI_CHIP_I965_G_1:
+ chipset = "Intel(R) 965G";
+ break;
+ case PCI_CHIP_I965_GM:
+ chipset = "Intel(R) 965GM";
+ break;
+ case PCI_CHIP_I965_GME:
+ chipset = "Intel(R) 965GME/GLE";
+ break;
+ default:
+ chipset = "unknown";
+ break;
+ }
+
+ sprintf(buffer, "pipe/i965 (chipset: %s)", chipset);
+ return buffer;
+}
+
+
+void
+brw_init_string_functions(struct brw_context *brw)
+{
+ brw->pipe.get_name = brw_get_name;
+ brw->pipe.get_vendor = brw_get_vendor;
+}
diff --git a/src/mesa/pipe/i965simple/brw_structs.h b/src/mesa/pipe/i965simple/brw_structs.h
new file mode 100644
index 0000000000..bbb087e95d
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_structs.h
@@ -0,0 +1,1348 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+
+#include "pipe/p_compiler.h"
+
+/* Command packets:
+ */
+struct header
+{
+ unsigned length:16;
+ unsigned opcode:16;
+};
+
+
+union header_union
+{
+ struct header bits;
+ unsigned dword;
+};
+
+struct brw_3d_control
+{
+ struct
+ {
+ unsigned length:8;
+ unsigned notify_enable:1;
+ unsigned pad:3;
+ unsigned wc_flush_enable:1;
+ unsigned depth_stall_enable:1;
+ unsigned operation:2;
+ unsigned opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned pad:2;
+ unsigned dest_addr_type:1;
+ unsigned dest_addr:29;
+ } dest;
+
+ unsigned dword2;
+ unsigned dword3;
+};
+
+
+struct brw_3d_primitive
+{
+ struct
+ {
+ unsigned length:8;
+ unsigned pad:2;
+ unsigned topology:5;
+ unsigned indexed:1;
+ unsigned opcode:16;
+ } header;
+
+ unsigned verts_per_instance;
+ unsigned start_vert_location;
+ unsigned instance_count;
+ unsigned start_instance_location;
+ unsigned base_vert_location;
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define BRW_FLUSH_READ_CACHE 0x1
+#define BRW_FLUSH_STATE_CACHE 0x2
+#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8
+
+struct brw_mi_flush
+{
+ unsigned flags:4;
+ unsigned pad:12;
+ unsigned opcode:16;
+};
+
+struct brw_vf_statistics
+{
+ unsigned statistics_enable:1;
+ unsigned pad:15;
+ unsigned opcode:16;
+};
+
+
+
+struct brw_binding_table_pointers
+{
+ struct header header;
+ unsigned vs;
+ unsigned gs;
+ unsigned clp;
+ unsigned sf;
+ unsigned wm;
+};
+
+
+struct brw_blend_constant_color
+{
+ struct header header;
+ float blend_constant_color[4];
+};
+
+
+struct brw_depthbuffer
+{
+ union header_union header;
+
+ union {
+ struct {
+ unsigned pitch:18;
+ unsigned format:3;
+ unsigned pad:4;
+ unsigned depth_offset_disable:1;
+ unsigned tile_walk:1;
+ unsigned tiled_surface:1;
+ unsigned pad2:1;
+ unsigned surface_type:3;
+ } bits;
+ unsigned dword;
+ } dword1;
+
+ unsigned dword2_base_addr;
+
+ union {
+ struct {
+ unsigned pad:1;
+ unsigned mipmap_layout:1;
+ unsigned lod:4;
+ unsigned width:13;
+ unsigned height:13;
+ } bits;
+ unsigned dword;
+ } dword3;
+
+ union {
+ struct {
+ unsigned pad:12;
+ unsigned min_array_element:9;
+ unsigned depth:11;
+ } bits;
+ unsigned dword;
+ } dword4;
+};
+
+struct brw_drawrect
+{
+ struct header header;
+ unsigned xmin:16;
+ unsigned ymin:16;
+ unsigned xmax:16;
+ unsigned ymax:16;
+ unsigned xorg:16;
+ unsigned yorg:16;
+};
+
+
+
+
+struct brw_global_depth_offset_clamp
+{
+ struct header header;
+ float depth_offset_clamp;
+};
+
+struct brw_indexbuffer
+{
+ union {
+ struct
+ {
+ unsigned length:8;
+ unsigned index_format:2;
+ unsigned cut_index_enable:1;
+ unsigned pad:5;
+ unsigned opcode:16;
+ } bits;
+ unsigned dword;
+
+ } header;
+
+ unsigned buffer_start;
+ unsigned buffer_end;
+};
+
+
+struct brw_line_stipple
+{
+ struct header header;
+
+ struct
+ {
+ unsigned pattern:16;
+ unsigned pad:16;
+ } bits0;
+
+ struct
+ {
+ unsigned repeat_count:9;
+ unsigned pad:7;
+ unsigned inverse_repeat_count:16;
+ } bits1;
+};
+
+
+struct brw_pipelined_state_pointers
+{
+ struct header header;
+
+ struct {
+ unsigned pad:5;
+ unsigned offset:27;
+ } vs;
+
+ struct
+ {
+ unsigned enable:1;
+ unsigned pad:4;
+ unsigned offset:27;
+ } gs;
+
+ struct
+ {
+ unsigned enable:1;
+ unsigned pad:4;
+ unsigned offset:27;
+ } clp;
+
+ struct
+ {
+ unsigned pad:5;
+ unsigned offset:27;
+ } sf;
+
+ struct
+ {
+ unsigned pad:5;
+ unsigned offset:27;
+ } wm;
+
+ struct
+ {
+ unsigned pad:5;
+ unsigned offset:27; /* KW: check me! */
+ } cc;
+};
+
+
+struct brw_polygon_stipple_offset
+{
+ struct header header;
+
+ struct {
+ unsigned y_offset:5;
+ unsigned pad:3;
+ unsigned x_offset:5;
+ unsigned pad0:19;
+ } bits0;
+};
+
+
+
+struct brw_polygon_stipple
+{
+ struct header header;
+ unsigned stipple[32];
+};
+
+
+
+struct brw_pipeline_select
+{
+ struct
+ {
+ unsigned pipeline_select:1;
+ unsigned pad:15;
+ unsigned opcode:16;
+ } header;
+};
+
+
+struct brw_pipe_control
+{
+ struct
+ {
+ unsigned length:8;
+ unsigned notify_enable:1;
+ unsigned pad:2;
+ unsigned instruction_state_cache_flush_enable:1;
+ unsigned write_cache_flush_enable:1;
+ unsigned depth_stall_enable:1;
+ unsigned post_sync_operation:2;
+
+ unsigned opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned pad:2;
+ unsigned dest_addr_type:1;
+ unsigned dest_addr:29;
+ } bits1;
+
+ unsigned data0;
+ unsigned data1;
+};
+
+
+struct brw_urb_fence
+{
+ struct
+ {
+ unsigned length:8;
+ unsigned vs_realloc:1;
+ unsigned gs_realloc:1;
+ unsigned clp_realloc:1;
+ unsigned sf_realloc:1;
+ unsigned vfe_realloc:1;
+ unsigned cs_realloc:1;
+ unsigned pad:2;
+ unsigned opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned vs_fence:10;
+ unsigned gs_fence:10;
+ unsigned clp_fence:10;
+ unsigned pad:2;
+ } bits0;
+
+ struct
+ {
+ unsigned sf_fence:10;
+ unsigned vf_fence:10;
+ unsigned cs_fence:10;
+ unsigned pad:2;
+ } bits1;
+};
+
+struct brw_constant_buffer_state /* previously brw_command_streamer */
+{
+ struct header header;
+
+ struct
+ {
+ unsigned nr_urb_entries:3;
+ unsigned pad:1;
+ unsigned urb_entry_size:5;
+ unsigned pad0:23;
+ } bits0;
+};
+
+struct brw_constant_buffer
+{
+ struct
+ {
+ unsigned length:8;
+ unsigned valid:1;
+ unsigned pad:7;
+ unsigned opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned buffer_length:6;
+ unsigned buffer_address:26;
+ } bits0;
+};
+
+struct brw_state_base_address
+{
+ struct header header;
+
+ struct
+ {
+ unsigned modify_enable:1;
+ unsigned pad:4;
+ unsigned general_state_address:27;
+ } bits0;
+
+ struct
+ {
+ unsigned modify_enable:1;
+ unsigned pad:4;
+ unsigned surface_state_address:27;
+ } bits1;
+
+ struct
+ {
+ unsigned modify_enable:1;
+ unsigned pad:4;
+ unsigned indirect_object_state_address:27;
+ } bits2;
+
+ struct
+ {
+ unsigned modify_enable:1;
+ unsigned pad:11;
+ unsigned general_state_upper_bound:20;
+ } bits3;
+
+ struct
+ {
+ unsigned modify_enable:1;
+ unsigned pad:11;
+ unsigned indirect_object_state_upper_bound:20;
+ } bits4;
+};
+
+struct brw_state_prefetch
+{
+ struct header header;
+
+ struct
+ {
+ unsigned prefetch_count:3;
+ unsigned pad:3;
+ unsigned prefetch_pointer:26;
+ } bits0;
+};
+
+struct brw_system_instruction_pointer
+{
+ struct header header;
+
+ struct
+ {
+ unsigned pad:4;
+ unsigned system_instruction_pointer:28;
+ } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+ unsigned pad0:1;
+ unsigned grf_reg_count:3;
+ unsigned pad1:2;
+ unsigned kernel_start_pointer:26;
+};
+
+struct thread1
+{
+ unsigned ext_halt_exception_enable:1;
+ unsigned sw_exception_enable:1;
+ unsigned mask_stack_exception_enable:1;
+ unsigned timeout_exception_enable:1;
+ unsigned illegal_op_exception_enable:1;
+ unsigned pad0:3;
+ unsigned depth_coef_urb_read_offset:6; /* WM only */
+ unsigned pad1:2;
+ unsigned floating_point_mode:1;
+ unsigned thread_priority:1;
+ unsigned binding_table_entry_count:8;
+ unsigned pad3:5;
+ unsigned single_program_flow:1;
+};
+
+struct thread2
+{
+ unsigned per_thread_scratch_space:4;
+ unsigned pad0:6;
+ unsigned scratch_space_base_pointer:22;
+};
+
+
+struct thread3
+{
+ unsigned dispatch_grf_start_reg:4;
+ unsigned urb_entry_read_offset:6;
+ unsigned pad0:1;
+ unsigned urb_entry_read_length:6;
+ unsigned pad1:1;
+ unsigned const_urb_entry_read_offset:6;
+ unsigned pad2:1;
+ unsigned const_urb_entry_read_length:6;
+ unsigned pad3:1;
+};
+
+
+
+struct brw_clip_unit_state
+{
+ struct thread0 thread0;
+ struct
+ {
+ unsigned pad0:7;
+ unsigned sw_exception_enable:1;
+ unsigned pad1:3;
+ unsigned mask_stack_exception_enable:1;
+ unsigned pad2:1;
+ unsigned illegal_op_exception_enable:1;
+ unsigned pad3:2;
+ unsigned floating_point_mode:1;
+ unsigned thread_priority:1;
+ unsigned binding_table_entry_count:8;
+ unsigned pad4:5;
+ unsigned single_program_flow:1;
+ } thread1;
+
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:9;
+ unsigned gs_output_stats:1; /* not always */
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:1; /* may be less */
+ unsigned pad3:6;
+ } thread4;
+
+ struct
+ {
+ unsigned pad0:13;
+ unsigned clip_mode:3;
+ unsigned userclip_enable_flags:8;
+ unsigned userclip_must_clip:1;
+ unsigned pad1:1;
+ unsigned guard_band_enable:1;
+ unsigned viewport_z_clip_enable:1;
+ unsigned viewport_xy_clip_enable:1;
+ unsigned vertex_position_space:1;
+ unsigned api_mode:1;
+ unsigned pad2:1;
+ } clip5;
+
+ struct
+ {
+ unsigned pad0:5;
+ unsigned clipper_viewport_state_ptr:27;
+ } clip6;
+
+
+ float viewport_xmin;
+ float viewport_xmax;
+ float viewport_ymin;
+ float viewport_ymax;
+};
+
+
+
+struct brw_cc_unit_state
+{
+ struct
+ {
+ unsigned pad0:3;
+ unsigned bf_stencil_pass_depth_pass_op:3;
+ unsigned bf_stencil_pass_depth_fail_op:3;
+ unsigned bf_stencil_fail_op:3;
+ unsigned bf_stencil_func:3;
+ unsigned bf_stencil_enable:1;
+ unsigned pad1:2;
+ unsigned stencil_write_enable:1;
+ unsigned stencil_pass_depth_pass_op:3;
+ unsigned stencil_pass_depth_fail_op:3;
+ unsigned stencil_fail_op:3;
+ unsigned stencil_func:3;
+ unsigned stencil_enable:1;
+ } cc0;
+
+
+ struct
+ {
+ unsigned bf_stencil_ref:8;
+ unsigned stencil_write_mask:8;
+ unsigned stencil_test_mask:8;
+ unsigned stencil_ref:8;
+ } cc1;
+
+
+ struct
+ {
+ unsigned logicop_enable:1;
+ unsigned pad0:10;
+ unsigned depth_write_enable:1;
+ unsigned depth_test_function:3;
+ unsigned depth_test:1;
+ unsigned bf_stencil_write_mask:8;
+ unsigned bf_stencil_test_mask:8;
+ } cc2;
+
+
+ struct
+ {
+ unsigned pad0:8;
+ unsigned alpha_test_func:3;
+ unsigned alpha_test:1;
+ unsigned blend_enable:1;
+ unsigned ia_blend_enable:1;
+ unsigned pad1:1;
+ unsigned alpha_test_format:1;
+ unsigned pad2:16;
+ } cc3;
+
+ struct
+ {
+ unsigned pad0:5;
+ unsigned cc_viewport_state_offset:27;
+ } cc4;
+
+ struct
+ {
+ unsigned pad0:2;
+ unsigned ia_dest_blend_factor:5;
+ unsigned ia_src_blend_factor:5;
+ unsigned ia_blend_function:3;
+ unsigned statistics_enable:1;
+ unsigned logicop_func:4;
+ unsigned pad1:11;
+ unsigned dither_enable:1;
+ } cc5;
+
+ struct
+ {
+ unsigned clamp_post_alpha_blend:1;
+ unsigned clamp_pre_alpha_blend:1;
+ unsigned clamp_range:2;
+ unsigned pad0:11;
+ unsigned y_dither_offset:2;
+ unsigned x_dither_offset:2;
+ unsigned dest_blend_factor:5;
+ unsigned src_blend_factor:5;
+ unsigned blend_function:3;
+ } cc6;
+
+ struct {
+ union {
+ float f;
+ ubyte ub[4];
+ } alpha_ref;
+ } cc7;
+};
+
+
+
+struct brw_sf_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:10;
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:6;
+ unsigned pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned front_winding:1;
+ unsigned viewport_transform:1;
+ unsigned pad0:3;
+ unsigned sf_viewport_state_offset:27;
+ } sf5;
+
+ struct
+ {
+ unsigned pad0:9;
+ unsigned dest_org_vbias:4;
+ unsigned dest_org_hbias:4;
+ unsigned scissor:1;
+ unsigned disable_2x2_trifilter:1;
+ unsigned disable_zero_pix_trifilter:1;
+ unsigned point_rast_rule:2;
+ unsigned line_endcap_aa_region_width:2;
+ unsigned line_width:4;
+ unsigned fast_scissor_disable:1;
+ unsigned cull_mode:2;
+ unsigned aa_enable:1;
+ } sf6;
+
+ struct
+ {
+ unsigned point_size:11;
+ unsigned use_point_size_state:1;
+ unsigned subpixel_precision:1;
+ unsigned sprite_point:1;
+ unsigned pad0:11;
+ unsigned trifan_pv:2;
+ unsigned linestrip_pv:2;
+ unsigned tristrip_pv:2;
+ unsigned line_last_pixel_enable:1;
+ } sf7;
+
+};
+
+
+struct brw_gs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:10;
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:1;
+ unsigned pad3:6;
+ } thread4;
+
+ struct
+ {
+ unsigned sampler_count:3;
+ unsigned pad0:2;
+ unsigned sampler_state_pointer:27;
+ } gs5;
+
+
+ struct
+ {
+ unsigned max_vp_index:4;
+ unsigned pad0:26;
+ unsigned reorder_enable:1;
+ unsigned pad1:1;
+ } gs6;
+};
+
+
+struct brw_vs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:10;
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:4;
+ unsigned pad3:3;
+ } thread4;
+
+ struct
+ {
+ unsigned sampler_count:3;
+ unsigned pad0:2;
+ unsigned sampler_state_pointer:27;
+ } vs5;
+
+ struct
+ {
+ unsigned vs_enable:1;
+ unsigned vert_cache_disable:1;
+ unsigned pad0:30;
+ } vs6;
+};
+
+
+struct brw_wm_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct {
+ unsigned stats_enable:1;
+ unsigned pad0:1;
+ unsigned sampler_count:3;
+ unsigned sampler_state_pointer:27;
+ } wm4;
+
+ struct
+ {
+ unsigned enable_8_pix:1;
+ unsigned enable_16_pix:1;
+ unsigned enable_32_pix:1;
+ unsigned pad0:7;
+ unsigned legacy_global_depth_bias:1;
+ unsigned line_stipple:1;
+ unsigned depth_offset:1;
+ unsigned polygon_stipple:1;
+ unsigned line_aa_region_width:2;
+ unsigned line_endcap_aa_region_width:2;
+ unsigned early_depth_test:1;
+ unsigned thread_dispatch_enable:1;
+ unsigned program_uses_depth:1;
+ unsigned program_computes_depth:1;
+ unsigned program_uses_killpixel:1;
+ unsigned legacy_line_rast: 1;
+ unsigned pad1:1;
+ unsigned max_threads:6;
+ unsigned pad2:1;
+ } wm5;
+
+ float global_depth_offset_constant;
+ float global_depth_offset_scale;
+};
+
+struct brw_sampler_default_color {
+ float color[4];
+};
+
+struct brw_sampler_state
+{
+
+ struct
+ {
+ unsigned shadow_function:3;
+ unsigned lod_bias:11;
+ unsigned min_filter:3;
+ unsigned mag_filter:3;
+ unsigned mip_filter:2;
+ unsigned base_level:5;
+ unsigned pad:1;
+ unsigned lod_preclamp:1;
+ unsigned default_color_mode:1;
+ unsigned pad0:1;
+ unsigned disable:1;
+ } ss0;
+
+ struct
+ {
+ unsigned r_wrap_mode:3;
+ unsigned t_wrap_mode:3;
+ unsigned s_wrap_mode:3;
+ unsigned pad:3;
+ unsigned max_lod:10;
+ unsigned min_lod:10;
+ } ss1;
+
+
+ struct
+ {
+ unsigned pad:5;
+ unsigned default_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ unsigned pad:19;
+ unsigned max_aniso:3;
+ unsigned chroma_key_mode:1;
+ unsigned chroma_key_index:2;
+ unsigned chroma_key_enable:1;
+ unsigned monochrome_filter_width:3;
+ unsigned monochrome_filter_height:3;
+ } ss3;
+};
+
+
+struct brw_clipper_viewport
+{
+ float xmin;
+ float xmax;
+ float ymin;
+ float ymax;
+};
+
+struct brw_cc_viewport
+{
+ float min_depth;
+ float max_depth;
+};
+
+struct brw_sf_viewport
+{
+ struct {
+ float m00;
+ float m11;
+ float m22;
+ float m30;
+ float m31;
+ float m32;
+ } viewport;
+
+ struct {
+ short xmin;
+ short ymin;
+ short xmax;
+ short ymax;
+ } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct brw_surface_state
+{
+ struct {
+ unsigned cube_pos_z:1;
+ unsigned cube_neg_z:1;
+ unsigned cube_pos_y:1;
+ unsigned cube_neg_y:1;
+ unsigned cube_pos_x:1;
+ unsigned cube_neg_x:1;
+ unsigned pad:4;
+ unsigned mipmap_layout_mode:1;
+ unsigned vert_line_stride_ofs:1;
+ unsigned vert_line_stride:1;
+ unsigned color_blend:1;
+ unsigned writedisable_blue:1;
+ unsigned writedisable_green:1;
+ unsigned writedisable_red:1;
+ unsigned writedisable_alpha:1;
+ unsigned surface_format:9;
+ unsigned data_return_format:1;
+ unsigned pad0:1;
+ unsigned surface_type:3;
+ } ss0;
+
+ struct {
+ unsigned base_addr;
+ } ss1;
+
+ struct {
+ unsigned pad:2;
+ unsigned mip_count:4;
+ unsigned width:13;
+ unsigned height:13;
+ } ss2;
+
+ struct {
+ unsigned tile_walk:1;
+ unsigned tiled_surface:1;
+ unsigned pad:1;
+ unsigned pitch:18;
+ unsigned depth:11;
+ } ss3;
+
+ struct {
+ unsigned pad:19;
+ unsigned min_array_elt:9;
+ unsigned min_lod:4;
+ } ss4;
+};
+
+
+
+struct brw_vertex_buffer_state
+{
+ struct {
+ unsigned pitch:11;
+ unsigned pad:15;
+ unsigned access_type:1;
+ unsigned vb_index:5;
+ } vb0;
+
+ unsigned start_addr;
+ unsigned max_index;
+#if 1
+ unsigned instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define BRW_VBP_MAX 17
+
+struct brw_vb_array_state {
+ struct header header;
+ struct brw_vertex_buffer_state vb[BRW_VBP_MAX];
+};
+
+
+struct brw_vertex_element_state
+{
+ struct
+ {
+ unsigned src_offset:11;
+ unsigned pad:5;
+ unsigned src_format:9;
+ unsigned pad0:1;
+ unsigned valid:1;
+ unsigned vertex_buffer_index:5;
+ } ve0;
+
+ struct
+ {
+ unsigned dst_offset:8;
+ unsigned pad:8;
+ unsigned vfcomponent3:4;
+ unsigned vfcomponent2:4;
+ unsigned vfcomponent1:4;
+ unsigned vfcomponent0:4;
+ } ve1;
+};
+
+#define BRW_VEP_MAX 18
+
+struct brw_vertex_element_packet {
+ struct header header;
+ struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct brw_urb_immediate {
+ unsigned opcode:4;
+ unsigned offset:6;
+ unsigned swizzle_control:2;
+ unsigned pad:1;
+ unsigned allocate:1;
+ unsigned used:1;
+ unsigned complete:1;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+
+struct brw_instruction
+{
+ struct
+ {
+ unsigned opcode:7;
+ unsigned pad:1;
+ unsigned access_mode:1;
+ unsigned mask_control:1;
+ unsigned dependency_control:2;
+ unsigned compression_control:2;
+ unsigned thread_control:2;
+ unsigned predicate_control:4;
+ unsigned predicate_inverse:1;
+ unsigned execution_size:3;
+ unsigned destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
+ unsigned pad0:2;
+ unsigned debug_control:1;
+ unsigned saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned src1_reg_file:2;
+ unsigned src1_reg_type:3;
+ unsigned pad:1;
+ unsigned dest_subreg_nr:5;
+ unsigned dest_reg_nr:8;
+ unsigned dest_horiz_stride:2;
+ unsigned dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned pad:6;
+ int dest_indirect_offset:10; /* offset against the deref'd address reg */
+ unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ unsigned dest_horiz_stride:2;
+ unsigned dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned src1_reg_file:2;
+ unsigned src1_reg_type:3;
+ unsigned pad0:1;
+ unsigned dest_writemask:4;
+ unsigned dest_subreg_nr:1;
+ unsigned dest_reg_nr:8;
+ unsigned pad1:2;
+ unsigned dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned pad0:6;
+ unsigned dest_writemask:4;
+ int dest_indirect_offset:6;
+ unsigned dest_subreg_nr:3;
+ unsigned pad1:2;
+ unsigned dest_address_mode:1;
+ } ia16;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ unsigned src0_subreg_nr:5;
+ unsigned src0_reg_nr:8;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_horiz_stride:2;
+ unsigned src0_width:3;
+ unsigned src0_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad:6;
+ } da1;
+
+ struct
+ {
+ int src0_indirect_offset:10;
+ unsigned src0_subreg_nr:3;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_horiz_stride:2;
+ unsigned src0_width:3;
+ unsigned src0_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad:6;
+ } ia1;
+
+ struct
+ {
+ unsigned src0_swz_x:2;
+ unsigned src0_swz_y:2;
+ unsigned src0_subreg_nr:1;
+ unsigned src0_reg_nr:8;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_swz_z:2;
+ unsigned src0_swz_w:2;
+ unsigned pad0:1;
+ unsigned src0_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad1:6;
+ } da16;
+
+ struct
+ {
+ unsigned src0_swz_x:2;
+ unsigned src0_swz_y:2;
+ int src0_indirect_offset:6;
+ unsigned src0_subreg_nr:3;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_swz_z:2;
+ unsigned src0_swz_w:2;
+ unsigned pad0:1;
+ unsigned src0_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad1:6;
+ } ia16;
+
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ unsigned src1_subreg_nr:5;
+ unsigned src1_reg_nr:8;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned pad:1;
+ unsigned src1_horiz_stride:2;
+ unsigned src1_width:3;
+ unsigned src1_vert_stride:4;
+ unsigned pad0:7;
+ } da1;
+
+ struct
+ {
+ unsigned src1_swz_x:2;
+ unsigned src1_swz_y:2;
+ unsigned src1_subreg_nr:1;
+ unsigned src1_reg_nr:8;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned pad0:1;
+ unsigned src1_swz_z:2;
+ unsigned src1_swz_w:2;
+ unsigned pad1:1;
+ unsigned src1_vert_stride:4;
+ unsigned pad2:7;
+ } da16;
+
+ struct
+ {
+ int src1_indirect_offset:10;
+ unsigned src1_subreg_nr:3;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned pad0:1;
+ unsigned src1_horiz_stride:2;
+ unsigned src1_width:3;
+ unsigned src1_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad1:6;
+ } ia1;
+
+ struct
+ {
+ unsigned src1_swz_x:2;
+ unsigned src1_swz_y:2;
+ int src1_indirect_offset:6;
+ unsigned src1_subreg_nr:3;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned pad0:1;
+ unsigned src1_swz_z:2;
+ unsigned src1_swz_w:2;
+ unsigned pad1:1;
+ unsigned src1_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad2:6;
+ } ia16;
+
+
+ struct
+ {
+ int jump_count:16; /* note: signed */
+ unsigned pop_count:4;
+ unsigned pad0:12;
+ } if_else;
+
+ struct {
+ unsigned function:4;
+ unsigned int_type:1;
+ unsigned precision:1;
+ unsigned saturate:1;
+ unsigned data_type:1;
+ unsigned pad0:8;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } math;
+
+ struct {
+ unsigned binding_table_index:8;
+ unsigned sampler:4;
+ unsigned return_format:2;
+ unsigned msg_type:2;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } sampler;
+
+ struct brw_urb_immediate urb;
+
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:4;
+ unsigned msg_type:2;
+ unsigned target_cache:2;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } dp_read;
+
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:3;
+ unsigned pixel_scoreboard_clear:1;
+ unsigned msg_type:3;
+ unsigned send_commit_msg:1;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } dp_write;
+
+ struct {
+ unsigned pad:16;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } generic;
+
+ int d;
+ unsigned ud;
+ } bits3;
+};
+
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_surface.c b/src/mesa/pipe/i965simple/brw_surface.c
new file mode 100644
index 0000000000..b5a0318047
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_surface.c
@@ -0,0 +1,397 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "brw_blit.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_util.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+
+
+#define CLIP_TILE \
+ do { \
+ if (x >= ps->width) \
+ return; \
+ if (y >= ps->height) \
+ return; \
+ if (x + w > ps->width) \
+ w = ps->width - x; \
+ if (y + h > ps->height) \
+ h = ps->height -y; \
+ } while(0)
+
+
+/**
+ * Note: this is exactly like a8r8g8b8_get_tile() in sp_surface.c
+ * Share it someday.
+ */
+static void
+brw_get_tile_rgba(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h, float *p)
+{
+ const unsigned *src
+ = ((const unsigned *) (ps->map + ps->offset))
+ + y * ps->pitch + x;
+ unsigned i, j;
+ unsigned w0 = w;
+
+ CLIP_TILE;
+
+ switch (ps->format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++) {
+ const unsigned pixel = src[j];
+ pRow[0] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff);
+ pRow[1] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff);
+ pRow[2] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff);
+ pRow[3] = UBYTE_TO_FLOAT((pixel >> 24) & 0xff);
+ pRow += 4;
+ }
+ src += ps->pitch;
+ p += w0 * 4;
+ }
+ break;
+ case PIPE_FORMAT_S8Z24_UNORM:
+ {
+ const float scale = 1.0f / (float) 0xffffff;
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++) {
+ const unsigned pixel = src[j];
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (pixel & 0xffffff) * scale;
+ pRow += 4;
+ }
+ src += ps->pitch;
+ p += w0 * 4;
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+static void
+brw_put_tile_rgba(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h, const float *p)
+{
+ /* TODO */
+ assert(0);
+}
+
+
+/*
+ * XXX note: same as code in sp_surface.c
+ */
+static void
+brw_get_tile(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ void *p, int dst_stride)
+{
+ const uint cpp = ps->cpp;
+ const uint w0 = w;
+ const ubyte *pSrc;
+ ubyte *pDest;
+ uint i;
+
+ assert(ps->map);
+
+ CLIP_TILE;
+
+ if (dst_stride == 0) {
+ dst_stride = w0 * cpp;
+ }
+
+ pSrc = ps->map + ps->offset + (y * ps->pitch + x) * cpp;
+ pDest = (ubyte *) p;
+
+ for (i = 0; i < h; i++) {
+ memcpy(pDest, pSrc, w0 * cpp);
+ pDest += dst_stride;
+ pSrc += ps->pitch * cpp;
+ }
+}
+
+
+/*
+ * XXX note: same as code in sp_surface.c
+ */
+static void
+brw_put_tile(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ const void *p, int src_stride)
+{
+ const uint cpp = ps->cpp;
+ const uint w0 = w;
+ const ubyte *pSrc;
+ ubyte *pDest;
+ uint i;
+
+ assert(ps->map);
+
+ CLIP_TILE;
+
+ if (src_stride == 0) {
+ src_stride = w0 * cpp;
+ }
+
+ pSrc = (const ubyte *) p;
+ pDest = ps->map + ps->offset + (y * ps->pitch + x) * cpp;
+
+ for (i = 0; i < h; i++) {
+ memcpy(pDest, pSrc, w0 * cpp);
+ pDest += ps->pitch * cpp;
+ pSrc += src_stride;
+ }
+}
+
+
+/*
+ * XXX note: same as code in sp_surface.c
+ */
+static struct pipe_surface *
+brw_get_tex_surface(struct pipe_context *pipe,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice)
+{
+ struct brw_texture *tex = (struct brw_texture *)pt;
+ struct pipe_surface *ps;
+ unsigned offset; /* in bytes */
+
+ offset = tex->level_offset[level];
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ offset += tex->image_offset[level][face] * pt->cpp;
+ }
+ else if (pt->target == PIPE_TEXTURE_3D) {
+ offset += tex->image_offset[level][zslice] * pt->cpp;
+ }
+ else {
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+
+ ps = pipe->winsys->surface_alloc(pipe->winsys);
+ if (ps) {
+ assert(ps->format);
+ assert(ps->refcount);
+ pipe->winsys->buffer_reference(pipe->winsys, &ps->buffer, tex->buffer);
+ ps->format = pt->format;
+ ps->cpp = pt->cpp;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->pitch = tex->pitch;
+ ps->offset = offset;
+ }
+ return ps;
+}
+
+/*
+ * XXX Move this into core Mesa?
+ */
+static void
+_mesa_copy_rect(ubyte * dst,
+ unsigned cpp,
+ unsigned dst_pitch,
+ unsigned dst_x,
+ unsigned dst_y,
+ unsigned width,
+ unsigned height,
+ const ubyte * src,
+ unsigned src_pitch,
+ unsigned src_x,
+ unsigned src_y)
+{
+ unsigned i;
+
+ dst_pitch *= cpp;
+ src_pitch *= cpp;
+ dst += dst_x * cpp;
+ src += src_x * cpp;
+ dst += dst_y * dst_pitch;
+ src += src_y * dst_pitch;
+ width *= cpp;
+
+ if (width == dst_pitch && width == src_pitch)
+ memcpy(dst, src, height * width);
+ else {
+ for (i = 0; i < height; i++) {
+ memcpy(dst, src, width);
+ dst += dst_pitch;
+ src += src_pitch;
+ }
+ }
+}
+
+/* Upload data to a rectangular sub-region. Lots of choices how to do this:
+ *
+ * - memcpy by span to current destination
+ * - upload data as new buffer and blit
+ *
+ * Currently always memcpy.
+ */
+static void
+brw_surface_data(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ const void *src, unsigned src_pitch,
+ unsigned srcx, unsigned srcy, unsigned width, unsigned height)
+{
+ _mesa_copy_rect(pipe_surface_map(dst) + dst->offset,
+ dst->cpp,
+ dst->pitch,
+ dstx, dsty, width, height, src, src_pitch, srcx, srcy);
+
+ pipe_surface_unmap(dst);
+}
+
+
+/* Assumes all values are within bounds -- no checking at this level -
+ * do it higher up if required.
+ */
+static void
+brw_surface_copy(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ struct pipe_surface *src,
+ unsigned srcx, unsigned srcy, unsigned width, unsigned height)
+{
+ assert(dst != src);
+ assert(dst->cpp == src->cpp);
+
+ if (0) {
+ _mesa_copy_rect(pipe_surface_map(dst) + dst->offset,
+ dst->cpp,
+ dst->pitch,
+ dstx, dsty,
+ width, height,
+ pipe_surface_map(src) + src->offset,
+ src->pitch,
+ srcx, srcy);
+
+ pipe_surface_unmap(src);
+ pipe_surface_unmap(dst);
+ }
+ else {
+ brw_copy_blit(brw_context(pipe),
+ dst->cpp,
+ (short) src->pitch, src->buffer, src->offset, FALSE,
+ (short) dst->pitch, dst->buffer, dst->offset, FALSE,
+ (short) srcx, (short) srcy, (short) dstx, (short) dsty,
+ (short) width, (short) height, PIPE_LOGICOP_COPY);
+ }
+}
+
+/* Fill a rectangular sub-region. Need better logic about when to
+ * push buffers into AGP - will currently do so whenever possible.
+ */
+static ubyte *
+get_pointer(struct pipe_surface *dst, unsigned x, unsigned y)
+{
+ return dst->map + (y * dst->pitch + x) * dst->cpp;
+}
+
+
+static void
+brw_surface_fill(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height, unsigned value)
+{
+ if (0) {
+ unsigned i, j;
+
+ (void)pipe_surface_map(dst);
+
+ switch (dst->cpp) {
+ case 1: {
+ ubyte *row = get_pointer(dst, dstx, dsty);
+ for (i = 0; i < height; i++) {
+ memset(row, value, width);
+ row += dst->pitch;
+ }
+ }
+ break;
+ case 2: {
+ ushort *row = (ushort *) get_pointer(dst, dstx, dsty);
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ row[j] = (ushort) value;
+ row += dst->pitch;
+ }
+ }
+ break;
+ case 4: {
+ unsigned *row = (unsigned *) get_pointer(dst, dstx, dsty);
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ row[j] = value;
+ row += dst->pitch;
+ }
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ else {
+ brw_fill_blit(brw_context(pipe),
+ dst->cpp,
+ (short) dst->pitch,
+ dst->buffer, dst->offset, FALSE,
+ (short) dstx, (short) dsty,
+ (short) width, (short) height,
+ value);
+ }
+}
+
+void
+brw_init_surface_functions(struct brw_context *brw)
+{
+ brw->pipe.get_tex_surface = brw_get_tex_surface;
+ brw->pipe.get_tile = brw_get_tile;
+ brw->pipe.put_tile = brw_put_tile;
+ brw->pipe.get_tile_rgba = brw_get_tile_rgba;
+ brw->pipe.put_tile_rgba = brw_put_tile_rgba;
+
+ brw->pipe.surface_data = brw_surface_data;
+ brw->pipe.surface_copy = brw_surface_copy;
+ brw->pipe.surface_fill = brw_surface_fill;
+}
diff --git a/src/mesa/pipe/i965simple/brw_tex.c b/src/mesa/pipe/i965simple/brw_tex.c
new file mode 100644
index 0000000000..e9ff67c3f4
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_tex.c
@@ -0,0 +1,65 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+#if 0
+void brw_FrameBufferTexInit( struct brw_context *brw )
+{
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+ struct intel_region *region = intel->intelScreen->front_region;
+ struct gl_texture_object *obj;
+ struct gl_texture_image *img;
+
+ intel->frame_buffer_texobj = obj =
+ ctx->Driver.NewTextureObject( ctx, (unsigned) -1, GL_TEXTURE_2D );
+
+ obj->MinFilter = GL_NEAREST;
+ obj->MagFilter = GL_NEAREST;
+
+ img = ctx->Driver.NewTextureImage( ctx );
+
+ _mesa_init_teximage_fields( ctx, GL_TEXTURE_2D, img,
+ region->pitch, region->height, 1, 0,
+ region->cpp == 4 ? GL_RGBA : GL_RGB );
+
+ _mesa_set_tex_image( obj, GL_TEXTURE_2D, 0, img );
+}
+
+void brw_FrameBufferTexDestroy( struct brw_context *brw )
+{
+ brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx,
+ brw->intel.frame_buffer_texobj );
+}
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_tex_layout.c b/src/mesa/pipe/i965simple/brw_tex_layout.c
new file mode 100644
index 0000000000..b9514be0c2
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_tex_layout.c
@@ -0,0 +1,359 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+/* Code to layout images in a mipmap tree for i965.
+ */
+
+#include "brw_tex_layout.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_util.h"
+#include "pipe/p_winsys.h"
+
+#include "brw_context.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+#if 0
+unsigned intel_compressed_alignment(unsigned internalFormat)
+{
+ unsigned alignment = 4;
+
+ switch (internalFormat) {
+ case GL_COMPRESSED_RGB_FXT1_3DFX:
+ case GL_COMPRESSED_RGBA_FXT1_3DFX:
+ alignment = 8;
+ break;
+
+ default:
+ break;
+ }
+
+ return alignment;
+}
+#endif
+
+static unsigned minify( unsigned d )
+{
+ return MAX2(1, d>>1);
+}
+
+
+static boolean brw_miptree_layout(struct pipe_context *, struct brw_texture *);
+
+static void intel_miptree_set_image_offset(struct brw_texture *tex,
+ unsigned level,
+ unsigned img,
+ unsigned x, unsigned y)
+{
+ struct pipe_texture *pt = &tex->base;
+ if (img == 0 && level == 0)
+ assert(x == 0 && y == 0);
+ assert(img < tex->nr_images[level]);
+
+ tex->image_offset[level][img] = (x + y * tex->pitch) * pt->cpp;
+}
+
+static void intel_miptree_set_level_info(struct brw_texture *tex,
+ unsigned level,
+ unsigned nr_images,
+ unsigned x, unsigned y,
+ unsigned w, unsigned h, unsigned d)
+{
+ struct pipe_texture *pt = &tex->base;
+
+ assert(level < PIPE_MAX_TEXTURE_LEVELS);
+
+ pt->width[level] = w;
+ pt->height[level] = h;
+ pt->depth[level] = d;
+
+ tex->level_offset[level] = (x + y * tex->pitch) * pt->cpp;
+ tex->nr_images[level] = nr_images;
+
+ /*
+ DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
+ level, w, h, d, x, y, tex->level_offset[level]);
+ */
+
+ /* Not sure when this would happen, but anyway:
+ */
+ if (tex->image_offset[level]) {
+ FREE(tex->image_offset[level]);
+ tex->image_offset[level] = NULL;
+ }
+
+ assert(nr_images);
+ assert(!tex->image_offset[level]);
+
+ tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned));
+ tex->image_offset[level][0] = 0;
+}
+
+static void i945_miptree_layout_2d(struct brw_texture *tex)
+{
+ struct pipe_texture *pt = &tex->base;
+ unsigned align_h = 2, align_w = 4;
+ unsigned level;
+ unsigned x = 0;
+ unsigned y = 0;
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+
+ tex->pitch = pt->width[0];
+
+#if 0
+ if (pt->compressed) {
+ align_w = intel_compressed_alignment(pt->internal_format);
+ tex->pitch = ALIGN(pt->width[0], align_w);
+ }
+#endif
+
+ /* May need to adjust pitch to accomodate the placement of
+ * the 2nd mipmap. This occurs when the alignment
+ * constraints of mipmap placement push the right edge of the
+ * 2nd mipmap out past the width of its parent.
+ */
+ if (pt->first_level != pt->last_level) {
+ unsigned mip1_width;
+
+ if (pt->compressed) {
+ mip1_width = ALIGN(minify(pt->width[0]), align_w)
+ + ALIGN(minify(minify(pt->width[0])), align_w);
+ } else {
+ mip1_width = ALIGN(minify(pt->width[0]), align_w)
+ + minify(minify(pt->width[0]));
+ }
+
+ if (mip1_width > tex->pitch) {
+ tex->pitch = mip1_width;
+ }
+ }
+
+ /* Pitch must be a whole number of dwords, even though we
+ * express it in texels.
+ */
+ tex->pitch = ALIGN(tex->pitch * pt->cpp, 4) / pt->cpp;
+ tex->total_height = 0;
+
+ for ( level = pt->first_level ; level <= pt->last_level ; level++ ) {
+ unsigned img_height;
+
+ intel_miptree_set_level_info(tex, level, 1, x, y, width,
+ height, 1);
+
+ if (pt->compressed)
+ img_height = MAX2(1, height/4);
+ else
+ img_height = ALIGN(height, align_h);
+
+
+ /* Because the images are packed better, the final offset
+ * might not be the maximal one:
+ */
+ tex->total_height = MAX2(tex->total_height, y + img_height);
+
+ /* Layout_below: step right after second mipmap.
+ */
+ if (level == pt->first_level + 1) {
+ x += ALIGN(width, align_w);
+ }
+ else {
+ y += img_height;
+ }
+
+ width = minify(width);
+ height = minify(height);
+ }
+}
+
+static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture *tex)
+{
+ struct pipe_texture *pt = &tex->base;
+ /* XXX: these vary depending on image format:
+ */
+/* int align_w = 4; */
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_3D: {
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+ unsigned depth = pt->depth[0];
+ unsigned pack_x_pitch, pack_x_nr;
+ unsigned pack_y_pitch;
+ unsigned level;
+ unsigned align_h = 2;
+ unsigned align_w = 4;
+
+ tex->total_height = 0;
+#if 0
+ if (pt->compressed) {
+ align_w = intel_compressed_alignment(pt->internal_format);
+ pt->pitch = ALIGN(width, align_w);
+ pack_y_pitch = (height + 3) / 4;
+ } else
+#endif
+ {
+ tex->pitch = ALIGN(pt->width[0] * pt->cpp, 4) / pt->cpp;
+ pack_y_pitch = ALIGN(pt->height[0], align_h);
+ }
+
+ pack_x_pitch = tex->pitch;
+ pack_x_nr = 1;
+
+ for ( level = pt->first_level ; level <= pt->last_level ; level++ ) {
+ unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6;
+ int x = 0;
+ int y = 0;
+ int q, j;
+
+ intel_miptree_set_level_info(tex, level, nr_images,
+ 0, tex->total_height,
+ width, height, depth);
+
+ for (q = 0; q < nr_images;) {
+ for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+ intel_miptree_set_image_offset(tex, level, q, x, y);
+ x += pack_x_pitch;
+ }
+
+ x = 0;
+ y += pack_y_pitch;
+ }
+
+
+ tex->total_height += y;
+ width = minify(width);
+ height = minify(height);
+ depth = minify(depth);
+
+ if (pt->compressed) {
+ pack_y_pitch = (height + 3) / 4;
+
+ if (pack_x_pitch > ALIGN(width, align_w)) {
+ pack_x_pitch = ALIGN(width, align_w);
+ pack_x_nr <<= 1;
+ }
+ } else {
+ if (pack_x_pitch > 4) {
+ pack_x_pitch >>= 1;
+ pack_x_nr <<= 1;
+ assert(pack_x_pitch * pack_x_nr <= tex->pitch);
+ }
+
+ if (pack_y_pitch > 2) {
+ pack_y_pitch >>= 1;
+ pack_y_pitch = ALIGN(pack_y_pitch, align_h);
+ }
+ }
+
+ }
+ break;
+ }
+
+ default:
+ i945_miptree_layout_2d(tex);
+ break;
+ }
+#if 0
+ PRINT("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+ pt->pitch,
+ pt->total_height,
+ pt->cpp,
+ pt->pitch * pt->total_height * pt->cpp );
+#endif
+
+ return TRUE;
+}
+
+void
+brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt)
+{
+ struct brw_texture *tex = REALLOC(*pt, sizeof(struct pipe_texture),
+ sizeof(struct brw_texture));
+
+ if (tex) {
+ struct brw_context *brw = brw_context(pipe);
+
+ memset(&tex->base + 1, 0,
+ sizeof(struct brw_texture) - sizeof(struct pipe_texture));
+
+ if (brw_miptree_layout(pipe, tex)) {
+ tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64, 0, 0);
+
+ if (tex->buffer)
+ pipe->winsys->buffer_data(pipe->winsys, tex->buffer,
+ tex->pitch * tex->base.cpp *
+ tex->total_height, NULL,
+ PIPE_BUFFER_USAGE_PIXEL);
+ }
+
+ if (!tex->buffer) {
+ FREE(tex);
+ tex = NULL;
+ }
+ }
+
+ *pt = &tex->base;
+}
+
+void
+brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt)
+{
+ if (!*pt)
+ return;
+
+ /*
+ DBG("%s %p refcount will be %d\n",
+ __FUNCTION__, (void *) *pt, (*pt)->refcount - 1);
+ */
+ if (--(*pt)->refcount <= 0) {
+ struct brw_texture *tex = (struct brw_texture *)*pt;
+ uint i;
+
+ /*
+ DBG("%s deleting %p\n", __FUNCTION__, (void *) tex);
+ */
+
+ pipe->winsys->buffer_reference(pipe->winsys, &tex->buffer, NULL);
+
+ for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
+ if (tex->image_offset[i])
+ free(tex->image_offset[i]);
+
+ free(tex);
+ }
+ *pt = NULL;
+}
diff --git a/src/mesa/pipe/i965simple/brw_tex_layout.h b/src/mesa/pipe/i965simple/brw_tex_layout.h
new file mode 100644
index 0000000000..15e275058a
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_tex_layout.h
@@ -0,0 +1,15 @@
+#ifndef BRW_TEX_LAYOUT_H
+#define BRW_TEX_LAYOUT_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_context;
+struct pipe_texture;
+
+extern void
+brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt);
+
+extern void
+brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt);
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_urb.c b/src/mesa/pipe/i965simple/brw_urb.c
new file mode 100644
index 0000000000..64255b67c4
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_urb.c
@@ -0,0 +1,197 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#define VS 0
+#define GS 1
+#define CLP 2
+#define SF 3
+#define CS 4
+
+/* XXX: Are the min_entry_size numbers useful?
+ * XXX: Verify min_nr_entries, esp for VS.
+ * XXX: Verify SF min_entry_size.
+ */
+static const struct {
+ unsigned min_nr_entries;
+ unsigned preferred_nr_entries;
+ unsigned min_entry_size;
+ unsigned max_entry_size;
+} limits[CS+1] = {
+ { 8, 32, 1, 5 }, /* vs */
+ { 4, 8, 1, 5 }, /* gs */
+ { 6, 8, 1, 5 }, /* clp */
+ { 1, 8, 1, 12 }, /* sf */
+ { 1, 4, 1, 32 } /* cs */
+};
+
+
+static boolean check_urb_layout( struct brw_context *brw )
+{
+ brw->urb.vs_start = 0;
+ brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
+ brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
+ brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
+ brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
+
+ return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= 256;
+}
+
+/* Most minimal update, forces re-emit of URB fence packet after GS
+ * unit turned on/off.
+ */
+static void recalculate_urb_fence( struct brw_context *brw )
+{
+ unsigned csize = brw->curbe.total_size;
+ unsigned vsize = brw->vs.prog_data->urb_entry_size;
+ unsigned sfsize = brw->sf.prog_data->urb_entry_size;
+
+ if (csize < limits[CS].min_entry_size)
+ csize = limits[CS].min_entry_size;
+
+ if (vsize < limits[VS].min_entry_size)
+ vsize = limits[VS].min_entry_size;
+
+ if (sfsize < limits[SF].min_entry_size)
+ sfsize = limits[SF].min_entry_size;
+
+ if (brw->urb.vsize < vsize ||
+ brw->urb.sfsize < sfsize ||
+ brw->urb.csize < csize ||
+ (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize ||
+ brw->urb.sfsize > brw->urb.sfsize ||
+ brw->urb.csize > brw->urb.csize))) {
+
+
+ brw->urb.csize = csize;
+ brw->urb.sfsize = sfsize;
+ brw->urb.vsize = vsize;
+
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
+
+ if (!check_urb_layout(brw)) {
+ brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
+
+ brw->urb.constrained = 1;
+
+ if (!check_urb_layout(brw)) {
+ /* This is impossible, given the maximal sizes of urb
+ * entries and the values for minimum nr of entries
+ * provided above.
+ */
+ fprintf(stderr, "couldn't calculate URB layout!\n");
+ exit(1);
+ }
+
+ if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
+ printf("URB CONSTRAINED\n");
+ }
+ else
+ brw->urb.constrained = 0;
+
+ if (BRW_DEBUG & DEBUG_URB)
+ printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+ brw->urb.vs_start,
+ brw->urb.gs_start,
+ brw->urb.clip_start,
+ brw->urb.sf_start,
+ brw->urb.cs_start,
+ 256);
+
+ brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
+ }
+}
+
+
+const struct brw_tracked_state brw_recalculate_urb_fence = {
+ .dirty = {
+ .brw = BRW_NEW_CURBE_OFFSETS,
+ .cache = (CACHE_NEW_VS_PROG |
+ CACHE_NEW_SF_PROG)
+ },
+ .update = recalculate_urb_fence
+};
+
+
+
+
+
+void brw_upload_urb_fence(struct brw_context *brw)
+{
+ struct brw_urb_fence uf;
+ memset(&uf, 0, sizeof(uf));
+
+ uf.header.opcode = CMD_URB_FENCE;
+ uf.header.length = sizeof(uf)/4-2;
+ uf.header.vs_realloc = 1;
+ uf.header.gs_realloc = 1;
+ uf.header.clp_realloc = 1;
+ uf.header.sf_realloc = 1;
+ uf.header.vfe_realloc = 1;
+ uf.header.cs_realloc = 1;
+
+ /* The ordering below is correct, not the layout in the
+ * instruction.
+ *
+ * There are 256 urb reg pairs in total.
+ */
+ uf.bits0.vs_fence = brw->urb.gs_start;
+ uf.bits0.gs_fence = brw->urb.clip_start;
+ uf.bits0.clp_fence = brw->urb.sf_start;
+ uf.bits1.sf_fence = brw->urb.cs_start;
+ uf.bits1.cs_fence = 256;
+
+ BRW_BATCH_STRUCT(brw, &uf);
+}
+
+
+#if 0
+const struct brw_tracked_state brw_urb_fence = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_URB_FENCE | BRW_NEW_PSP,
+ .cache = 0
+ },
+ .update = brw_upload_urb_fence
+};
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_util.c b/src/mesa/pipe/i965simple/brw_util.c
new file mode 100644
index 0000000000..42391d7c8c
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_util.c
@@ -0,0 +1,104 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_util.h"
+#include "brw_defines.h"
+
+#include "pipe/p_defines.h"
+
+unsigned brw_count_bits( unsigned val )
+{
+ unsigned i;
+ for (i = 0; val ; val >>= 1)
+ if (val & 1)
+ i++;
+ return i;
+}
+
+
+unsigned brw_translate_blend_equation( int mode )
+{
+ switch (mode) {
+ case PIPE_BLEND_ADD:
+ return BRW_BLENDFUNCTION_ADD;
+ case PIPE_BLEND_MIN:
+ return BRW_BLENDFUNCTION_MIN;
+ case PIPE_BLEND_MAX:
+ return BRW_BLENDFUNCTION_MAX;
+ case PIPE_BLEND_SUBTRACT:
+ return BRW_BLENDFUNCTION_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
+ default:
+ assert(0);
+ return BRW_BLENDFUNCTION_ADD;
+ }
+}
+
+unsigned brw_translate_blend_factor( int factor )
+{
+ switch(factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return BRW_BLENDFACTOR_ZERO;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return BRW_BLENDFACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_ONE:
+ return BRW_BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return BRW_BLENDFACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return BRW_BLENDFACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return BRW_BLENDFACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return BRW_BLENDFACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return BRW_BLENDFACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return BRW_BLENDFACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return BRW_BLENDFACTOR_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return BRW_BLENDFACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return BRW_BLENDFACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return BRW_BLENDFACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+ default:
+ assert(0);
+ return BRW_BLENDFACTOR_ZERO;
+ }
+}
diff --git a/src/mesa/pipe/i965simple/brw_util.h b/src/mesa/pipe/i965simple/brw_util.h
new file mode 100644
index 0000000000..d60e5934db
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_util.h
@@ -0,0 +1,43 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_UTIL_H
+#define BRW_UTIL_H
+
+#include "pipe/p_state.h"
+
+extern unsigned brw_count_bits( unsigned val );
+extern unsigned brw_translate_blend_factor( int factor );
+extern unsigned brw_translate_blend_equation( int mode );
+
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_vs.c b/src/mesa/pipe/i965simple/brw_vs.c
new file mode 100644
index 0000000000..33c6624214
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_vs.c
@@ -0,0 +1,130 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_util.h"
+#include "brw_state.h"
+
+
+static void do_vs_prog( struct brw_context *brw,
+ const struct brw_vertex_program *vp,
+ struct brw_vs_prog_key *key )
+{
+ unsigned program_size;
+ const unsigned *program;
+ struct brw_vs_compile c;
+
+ memset(&c, 0, sizeof(c));
+ memcpy(&c.key, key, sizeof(*key));
+
+ brw_init_compile(&c.func);
+ c.vp = vp;
+
+ c.prog_data.outputs_written = vp->program.num_outputs;
+ c.prog_data.inputs_read = vp->program.num_inputs;
+
+#if 0
+ if (c.key.copy_edgeflag) {
+ c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE;
+ c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
+ }
+#endif
+
+ /* Emit GEN4 code.
+ */
+ brw_vs_emit(&c);
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ /*
+ */
+ brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG],
+ &c.key,
+ sizeof(c.key),
+ program,
+ program_size,
+ &c.prog_data,
+ &brw->vs.prog_data);
+}
+
+
+static void brw_upload_vs_prog( struct brw_context *brw )
+{
+ struct brw_vs_prog_key key;
+ const struct brw_vertex_program *vp = brw->attribs.VertexProgram;
+
+ assert(vp);
+
+ memset(&key, 0, sizeof(key));
+
+ /* Just upload the program verbatim for now. Always send it all
+ * the inputs it asks for, whether they are varying or not.
+ */
+ key.program_string_id = vp->id;
+ key.nr_userclip = brw->attribs.Clip.nr;
+ key.copy_edgeflag = (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL);
+
+#if 0
+ /* BRW_NEW_METAOPS
+ */
+ if (brw->metaops.active)
+ key.know_w_is_one = 1;
+#endif
+
+ /* Make an early check for the key.
+ */
+ if (brw_search_cache(&brw->cache[BRW_VS_PROG],
+ &key, sizeof(key),
+ &brw->vs.prog_data,
+ &brw->vs.prog_gs_offset))
+ return;
+
+ do_vs_prog(brw, vp, &key);
+}
+
+
+/* See brw_vs.c:
+ */
+const struct brw_tracked_state brw_vs_prog = {
+ .dirty = {
+#if 0
+ .brw = BRW_NEW_VS | BRW_NEW_METAOPS,
+#endif
+ .brw = BRW_NEW_VS,
+ .cache = 0
+ },
+ .update = brw_upload_vs_prog
+};
diff --git a/src/mesa/pipe/i965simple/brw_vs.h b/src/mesa/pipe/i965simple/brw_vs.h
new file mode 100644
index 0000000000..0e58f043b0
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_vs.h
@@ -0,0 +1,82 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_VS_H
+#define BRW_VS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+struct brw_vs_prog_key {
+ unsigned program_string_id;
+ unsigned nr_userclip:4;
+ unsigned copy_edgeflag:1;
+ unsigned know_w_is_one:1;
+ unsigned pad:26;
+};
+
+
+struct brw_vs_compile {
+ struct brw_compile func;
+ struct brw_vs_prog_key key;
+ struct brw_vs_prog_data prog_data;
+
+ struct brw_vertex_program *vp;
+
+ unsigned nr_inputs;
+
+ unsigned first_output;
+ unsigned nr_outputs;
+
+ unsigned first_tmp;
+ unsigned last_tmp;
+
+ struct brw_reg r0;
+ struct brw_reg r1;
+ struct brw_reg regs[12][128];
+ struct brw_reg tmp;
+ struct brw_reg stack;
+
+ struct {
+ boolean used_in_src;
+ struct brw_reg reg;
+ } output_regs[128];
+
+ struct brw_reg userplane[6];
+
+};
+
+void brw_vs_emit( struct brw_vs_compile *c );
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_vs_constval.c b/src/mesa/pipe/i965simple/brw_vs_constval.c
new file mode 100644
index 0000000000..de43e72c1d
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_vs_constval.c
@@ -0,0 +1,223 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_vs.h"
+
+#if 0
+/* Component is active if it may diverge from [0,0,0,1]. Undef values
+ * are promoted to [0,0,0,1] for the purposes of this analysis.
+ */
+struct tracker {
+ boolean twoside;
+ ubyte active[PROGRAM_OUTPUT+1][128];
+ unsigned size_masks[4];
+};
+
+
+static void set_active_component( struct tracker *t,
+ unsigned file,
+ unsigned index,
+ ubyte active )
+{
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ t->active[file][index] |= active;
+
+ default:
+ break;
+ }
+}
+
+static void set_active( struct tracker *t,
+ struct prog_dst_register dst,
+ unsigned active )
+{
+ set_active_component( t, dst.File, dst.Index, active & dst.WriteMask );
+}
+
+
+static ubyte get_active_component( struct tracker *t,
+ unsigned file,
+ unsigned index,
+ unsigned component,
+ ubyte swz )
+{
+ switch (swz) {
+ case SWIZZLE_ZERO:
+ return component < 3 ? 0 : (1<<component);
+ case SWIZZLE_ONE:
+ return component == 3 ? 0 : (1<<component);
+ default:
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ return t->active[file][index] & (1<<component);
+ default:
+ return 1 << component;
+ }
+ }
+}
+
+
+static ubyte get_active( struct tracker *t,
+ struct prog_src_register src )
+{
+ unsigned i;
+ ubyte active = src.NegateBase; /* NOTE! */
+
+ if (src.RelAddr)
+ return 0xf;
+
+ for (i = 0; i < 4; i++)
+ active |= get_active_component(t, src.File, src.Index, i,
+ GET_SWZ(src.Swizzle, i));
+
+ return active;
+}
+
+static ubyte get_output_size( struct tracker *t,
+ unsigned idx )
+{
+ ubyte active = t->active[PROGRAM_OUTPUT][idx];
+ if (active & (1<<3)) return 4;
+ if (active & (1<<2)) return 3;
+ if (active & (1<<1)) return 2;
+ if (active & (1<<0)) return 1;
+ return 0;
+}
+
+/* Note the potential copying that occurs in the setup program:
+ */
+static void calc_sizes( struct tracker *t )
+{
+ unsigned i;
+
+ if (t->twoside) {
+ t->active[PROGRAM_OUTPUT][VERT_RESULT_COL0] |=
+ t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC0];
+
+ t->active[PROGRAM_OUTPUT][VERT_RESULT_COL1] |=
+ t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC1];
+ }
+
+ for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
+ switch (get_output_size(t, i)) {
+ case 4: t->size_masks[4-1] |= 1<<i;
+ case 3: t->size_masks[3-1] |= 1<<i;
+ case 2: t->size_masks[2-1] |= 1<<i;
+ case 1: t->size_masks[1-1] |= 1<<i;
+ break;
+ }
+ }
+}
+
+static ubyte szflag[4+1] = {
+ 0,
+ 0x1,
+ 0x3,
+ 0x7,
+ 0xf
+};
+
+/* Pull a size out of the packed array:
+ */
+static unsigned get_input_size(struct brw_context *brw,
+ unsigned attr)
+{
+ unsigned sizes_dword = brw->vb.info.sizes[attr/16];
+ unsigned sizes_bits = (sizes_dword>>((attr%16)*2)) & 0x3;
+ return sizes_bits + 1;
+/* return brw->vb.inputs[attr].glarray->Size; */
+}
+
+/* Calculate sizes of vertex program outputs. Size is the largest
+ * component index which might vary from [0,0,0,1]
+ */
+static void calc_wm_input_sizes( struct brw_context *brw )
+{
+ /* BRW_NEW_VERTEX_PROGRAM */
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *)brw->vertex_program;
+ /* BRW_NEW_INPUT_DIMENSIONS */
+ struct tracker t;
+ unsigned insn;
+ unsigned i;
+
+ memset(&t, 0, sizeof(t));
+
+ /* _NEW_LIGHT */
+ if (brw->attribs.Light->Model.TwoSide)
+ t.twoside = 1;
+
+ for (i = 0; i < PIPE_ATTRIB_MAX; i++)
+ if (vp->program.Base.InputsRead & (1<<i))
+ set_active_component(&t, PROGRAM_INPUT, i,
+ szflag[get_input_size(brw, i)]);
+
+ for (insn = 0; insn < vp->program.Base.NumInstructions; insn++) {
+ struct prog_instruction *inst = &vp->program.Base.Instructions[insn];
+
+ switch (inst->Opcode) {
+ case OPCODE_ARL:
+ break;
+
+ case OPCODE_MOV:
+ set_active(&t, inst->DstReg, get_active(&t, inst->SrcReg[0]));
+ break;
+
+ default:
+ set_active(&t, inst->DstReg, 0xf);
+ break;
+ }
+ }
+
+ calc_sizes(&t);
+
+ if (memcmp(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)) != 0) {
+ memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks));
+ brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS;
+ }
+}
+
+const struct brw_tracked_state brw_wm_input_sizes = {
+ .dirty = {
+ .mesa = _NEW_LIGHT,
+ .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
+ .cache = 0
+ },
+ .update = calc_wm_input_sizes
+};
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_vs_emit.c b/src/mesa/pipe/i965simple/brw_vs_emit.c
new file mode 100644
index 0000000000..4121eae632
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_vs_emit.c
@@ -0,0 +1,1243 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_context.h"
+#include "brw_vs.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
+
+struct brw_prog_info {
+ unsigned num_temps;
+ unsigned num_addrs;
+
+ unsigned writes_psize;
+
+ unsigned pos_idx;
+ unsigned result_edge_idx;
+ unsigned edge_flag_idx;
+ unsigned psize_idx;
+};
+
+/* Do things as simply as possible. Allocate and populate all regs
+ * ahead of time.
+ */
+static void brw_vs_alloc_regs( struct brw_vs_compile *c,
+ struct brw_prog_info *info )
+{
+ unsigned i, reg = 0, mrf;
+ unsigned nr_params;
+
+ /* r0 -- reserved as usual
+ */
+ c->r0 = brw_vec8_grf(reg, 0); reg++;
+
+ /* User clip planes from curbe:
+ */
+ if (c->key.nr_userclip) {
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
+ }
+
+ /* Deal with curbe alignment:
+ */
+ reg += ((6+c->key.nr_userclip+3)/4)*2;
+ }
+
+ /* Vertex program parameters from curbe:
+ */
+ nr_params = c->vp->program.num_inputs; /*FIXME: i think this is wrong... */
+ for (i = 0; i < nr_params; i++) {
+ c->regs[TGSI_FILE_INPUT][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+ }
+ reg += (nr_params+1)/2;
+
+ c->prog_data.curb_read_length = reg - 1;
+
+
+
+ /* Allocate input regs:
+ */
+ c->nr_inputs = 0;
+ for (i = 0; i < PIPE_ATTRIB_MAX; i++) {
+ if (c->prog_data.inputs_read & (1<<i)) {
+ c->nr_inputs++;
+ c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+
+
+ /* Allocate outputs: TODO: could organize the non-position outputs
+ * to go straight into message regs.
+ */
+ c->nr_outputs = 0;
+ c->first_output = reg;
+ mrf = 4;
+ for (i = 0; i < c->vp->program.num_outputs; i++) {
+ if (c->prog_data.outputs_written & (1<<i)) {
+ c->nr_outputs++;
+#if 0
+ if (i == VERT_RESULT_HPOS) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ else if (i == VERT_RESULT_PSIZ) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ mrf++; /* just a placeholder? XXX fix later stages & remove this */
+ }
+ else {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
+ mrf++;
+ }
+#else
+ /* for now stuff everything in grf */
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+#endif
+ }
+ }
+
+ /* Allocate program temporaries:
+ */
+ for (i = 0; i < info->num_temps; i++) {
+ c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+
+ /* Address reg(s). Don't try to use the internal address reg until
+ * deref time.
+ */
+ for (i = 0; i < info->num_addrs; i++) {
+ c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
+ reg,
+ 0,
+ BRW_REGISTER_TYPE_D,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XXXX,
+ TGSI_WRITEMASK_X);
+ reg++;
+ }
+
+ for (i = 0; i < 128; i++) {
+ if (c->output_regs[i].used_in_src) {
+ c->output_regs[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+ reg += 2;
+
+
+ /* Some opcodes need an internal temporary:
+ */
+ c->first_tmp = reg;
+ c->last_tmp = reg; /* for allocation purposes */
+
+ /* Each input reg holds data from two vertices. The
+ * urb_read_length is the number of registers read from *each*
+ * vertex urb, so is half the amount:
+ */
+ c->prog_data.urb_read_length = (c->nr_inputs+1)/2;
+
+ c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4;
+ c->prog_data.total_grf = reg;
+}
+
+
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+ struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+static void release_tmps( struct brw_vs_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+static void unalias1( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if (dst.file == arg0.file && dst.nr == arg0.nr) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0);
+ brw_MOV(p, dst, tmp);
+ }
+ else {
+ func(c, dst, arg0);
+ }
+}
+
+static void unalias2( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+ (dst.file == arg1.file && dst.nr == arg1.nr)) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0, arg1);
+ brw_MOV(p, dst, tmp);
+ }
+ else {
+ func(c, dst, arg0, arg1);
+ }
+}
+
+static void emit_sop( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ unsigned cond)
+{
+ brw_push_insn_state(p);
+ brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, dst, brw_imm_f(1.0f));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, brw_imm_f(0.0f));
+ brw_pop_insn_state(p);
+}
+
+static void emit_seq( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
+}
+static void emit_slt( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
+}
+
+static void emit_max( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg1, arg0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+static void emit_min( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+static void emit_math1( struct brw_vs_compile *c,
+ unsigned function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ unsigned precision)
+{
+ /* There are various odd behaviours with SEND on the simulator. In
+ * addition there are documented issues with the fact that the GEN4
+ * processor doesn't do dependency control properly on SEND
+ * results. So, on balance, this kludge to get around failures
+ * with writemasked math results looks like it might be necessary
+ * whether that turns out to be a simulator bug or not:
+ */
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_math(p,
+ tmp,
+ function,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ arg0,
+ BRW_MATH_DATA_SCALAR,
+ precision);
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+static void emit_math2( struct brw_vs_compile *c,
+ unsigned function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ unsigned precision)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_MOV(p, brw_message_reg(3), arg1);
+
+ brw_math(p,
+ tmp,
+ function,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ arg0,
+ BRW_MATH_DATA_SCALAR,
+ precision);
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+
+static void emit_exp_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) {
+ struct brw_reg tmp = get_tmp(c);
+ struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
+
+ /* tmp_d = floor(arg0.x) */
+ brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
+
+ /* result[0] = 2.0 ^ tmp */
+
+ /* Adjust exponent for floating point:
+ * exp += 127
+ */
+ brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127));
+
+ /* Install exponent and sign.
+ * Excess drops off the edge:
+ */
+ brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X),
+ tmp_d, brw_imm_d(23));
+
+ release_tmp(c, tmp);
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) {
+ /* result[1] = arg0.x - floor(arg0.x) */
+ brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0));
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
+ /* As with the LOG instruction, we might be better off just
+ * doing a taylor expansion here, seeing as we have to do all
+ * the prep work.
+ *
+ * If mathbox partial precision is too low, consider also:
+ * result[3] = result[0] * EXP(result[1])
+ */
+ emit_math1(c,
+ BRW_MATH_FUNCTION_EXP,
+ brw_writemask(dst, TGSI_WRITEMASK_Z),
+ brw_swizzle1(arg0, 0),
+ BRW_MATH_PRECISION_PARTIAL);
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
+ /* result[3] = 1.0; */
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1));
+ }
+}
+
+
+static void emit_log_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+ struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
+ boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp) {
+ tmp = get_tmp(c);
+ tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+ }
+
+ /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
+ * according to spec:
+ *
+ * These almost look likey they could be joined up, but not really
+ * practical:
+ *
+ * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
+ * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
+ */
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) {
+ brw_AND(p,
+ brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
+ brw_swizzle1(arg0_ud, 0),
+ brw_imm_ud((1U<<31)-1));
+
+ brw_SHR(p,
+ brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
+ tmp_ud,
+ brw_imm_ud(23));
+
+ brw_ADD(p,
+ brw_writemask(tmp, TGSI_WRITEMASK_X),
+ retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */
+ brw_imm_d(-127));
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) {
+ brw_AND(p,
+ brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
+ brw_swizzle1(arg0_ud, 0),
+ brw_imm_ud((1<<23)-1));
+
+ brw_OR(p,
+ brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
+ tmp_ud,
+ brw_imm_ud(127<<23));
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
+ /* result[2] = result[0] + LOG2(result[1]); */
+
+ /* Why bother? The above is just a hint how to do this with a
+ * taylor series. Maybe we *should* use a taylor series as by
+ * the time all the above has been done it's almost certainly
+ * quicker than calling the mathbox, even with low precision.
+ *
+ * Options are:
+ * - result[0] + mathbox.LOG2(result[1])
+ * - mathbox.LOG2(arg0.x)
+ * - result[0] + inline_taylor_approx(result[1])
+ */
+ emit_math1(c,
+ BRW_MATH_FUNCTION_LOG,
+ brw_writemask(tmp, TGSI_WRITEMASK_Z),
+ brw_swizzle1(tmp, 1),
+ BRW_MATH_PRECISION_FULL);
+
+ brw_ADD(p,
+ brw_writemask(tmp, TGSI_WRITEMASK_Z),
+ brw_swizzle1(tmp, 2),
+ brw_swizzle1(tmp, 0));
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
+ /* result[3] = 1.0; */
+ brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1));
+ }
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+
+
+/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
+ */
+static void emit_dst_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1)
+{
+ struct brw_compile *p = &c->func;
+
+ /* There must be a better way to do this:
+ */
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X)
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0));
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y)
+ brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1);
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z)
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0);
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W)
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1);
+}
+
+static void emit_xpd( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg t,
+ struct brw_reg u)
+{
+ brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3));
+ brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
+}
+
+
+
+static void emit_lit_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+ struct brw_reg tmp = dst;
+ boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0));
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1));
+
+ /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
+ * to get all channels active inside the IF. In the clipping code
+ * we run with NoMask, so it's not an option and we can use
+ * BRW_EXECUTE_1 for all comparisions.
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
+ if_insn = brw_IF(p, BRW_EXECUTE_8);
+ {
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0));
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
+ brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z), brw_swizzle1(arg0,1));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ emit_math2(c,
+ BRW_MATH_FUNCTION_POW,
+ brw_writemask(dst, TGSI_WRITEMASK_Z),
+ brw_swizzle1(tmp, 2),
+ brw_swizzle1(arg0, 3),
+ BRW_MATH_PRECISION_PARTIAL);
+ }
+
+ brw_ENDIF(p, if_insn);
+}
+
+
+
+
+
+/* TODO: relative addressing!
+ */
+static struct brw_reg get_reg( struct brw_vs_compile *c,
+ unsigned file,
+ unsigned index )
+{
+ switch (file) {
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ case TGSI_FILE_CONSTANT:
+ assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
+ return c->regs[TGSI_FILE_CONSTANT][index];
+ case TGSI_FILE_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case TGSI_FILE_NULL: /* undef values */
+ return brw_null_reg();
+
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
+
+static struct brw_reg deref( struct brw_vs_compile *c,
+ struct brw_reg arg,
+ int offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = vec4(get_tmp(c));
+ struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
+ unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+ struct brw_reg indirect = brw_vec4_indirect(0,0);
+
+ {
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ /* This is pretty clunky - load the address register twice and
+ * fetch each 4-dword value in turn. There must be a way to do
+ * this in a single pass, but I couldn't get it to work.
+ */
+ brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
+ brw_MOV(p, tmp, indirect);
+
+ brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
+ brw_MOV(p, suboffset(tmp, 4), indirect);
+
+ brw_pop_insn_state(p);
+ }
+
+ return vec8(tmp);
+}
+
+
+static void emit_arl( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_RNDD(p, tmp, arg0);
+ brw_MUL(p, dst, tmp, brw_imm_d(16));
+
+ if (need_tmp)
+ release_tmp(c, tmp);
+}
+
+
+/* Will return mangled results for SWZ op. The emit_swz() function
+ * ignores this result and recalculates taking extended swizzles into
+ * account.
+ */
+static struct brw_reg get_arg( struct brw_vs_compile *c,
+ struct tgsi_src_register *src )
+{
+ struct brw_reg reg;
+
+ if (src->File == TGSI_FILE_NULL)
+ return brw_null_reg();
+
+#if 0
+ if (src->RelAddr)
+ reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
+ else
+#endif
+ reg = get_reg(c, src->File, src->Index);
+
+ /* Convert 3-bit swizzle to 2-bit.
+ */
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX,
+ src->SwizzleY,
+ src->SwizzleZ,
+ src->SwizzleW);
+
+ /* Note this is ok for non-swizzle instructions:
+ */
+ reg.negate = src->Negate ? 1 : 0;
+
+ return reg;
+}
+
+
+static struct brw_reg get_dst( struct brw_vs_compile *c,
+ struct tgsi_dst_register dst )
+{
+ struct brw_reg reg = get_reg(c, dst.File, dst.Index);
+
+ reg.dw1.bits.writemask = dst.WriteMask;
+
+ return reg;
+}
+
+
+
+
+static void emit_swz( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct tgsi_src_register src )
+{
+ struct brw_compile *p = &c->func;
+ unsigned zeros_mask = 0;
+ unsigned ones_mask = 0;
+ unsigned src_mask = 0;
+ ubyte src_swz[4];
+ boolean need_tmp = (src.Negate &&
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+ struct brw_reg tmp = dst;
+ unsigned i;
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ for (i = 0; i < 4; i++) {
+ if (dst.dw1.bits.writemask & (1<<i)) {
+ ubyte s = 0;
+ switch(i) {
+ case 0:
+ s = src.SwizzleX;
+ break;
+ s = src.SwizzleY;
+ case 1:
+ break;
+ s = src.SwizzleZ;
+ case 2:
+ break;
+ s = src.SwizzleW;
+ case 3:
+ break;
+ }
+ switch (s) {
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
+ src_mask |= 1<<i;
+ src_swz[i] = s;
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zeros_mask |= 1<<i;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ ones_mask |= 1<<i;
+ break;
+ }
+ }
+ }
+
+ /* Do src first, in case dst aliases src:
+ */
+ if (src_mask) {
+ struct brw_reg arg0;
+
+#if 0
+ if (src.RelAddr)
+ arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
+ else
+#endif
+ arg0 = get_reg(c, src.File, src.Index);
+
+ arg0 = brw_swizzle(arg0,
+ src_swz[0], src_swz[1],
+ src_swz[2], src_swz[3]);
+
+ brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
+ }
+
+ if (zeros_mask)
+ brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
+
+ if (ones_mask)
+ brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
+
+ if (src.Negate)
+ brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+
+/* Post-vertex-program processing. Send the results to the URB.
+ */
+static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg m0 = brw_message_reg(0);
+ struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx];
+ struct brw_reg ndc;
+
+ if (c->key.copy_edgeflag) {
+ brw_MOV(p,
+ get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx),
+ get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx));
+ }
+
+
+ /* Build ndc coords? TODO: Shortcircuit when w is known to be one.
+ */
+ if (!c->key.know_w_is_one) {
+ ndc = get_tmp(c);
+ emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+ brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc);
+ }
+ else {
+ ndc = pos;
+ }
+
+ /* This includes the workaround for -ve rhw, so is no longer an
+ * optional step:
+ */
+ if (info->writes_psize ||
+ c->key.nr_userclip ||
+ !c->key.know_w_is_one)
+ {
+ struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ unsigned i;
+
+ brw_MOV(p, header1, brw_imm_ud(0));
+
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ if (info->writes_psize) {
+ struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx];
+ brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W),
+ brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+ brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1,
+ brw_imm_ud(0x7ff<<8));
+ }
+
+
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
+ brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+
+ /* i965 clipping workaround:
+ * 1) Test for -ve rhw
+ * 2) If set,
+ * set ndc = (0,0,0,0)
+ * set ucp[6] = 1
+ *
+ * Later, clipping will detect ucp[6] and ensure the primitive is
+ * clipped against all fixed planes.
+ */
+ if (!c->key.know_w_is_one) {
+ brw_CMP(p,
+ vec8(brw_null_reg()),
+ BRW_CONDITIONAL_L,
+ brw_swizzle1(ndc, 3),
+ brw_imm_f(0));
+
+ brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6));
+ brw_MOV(p, ndc, brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ brw_set_access_mode(p, BRW_ALIGN_1); /* why? */
+ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ release_tmp(c, header1);
+ }
+ else {
+ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
+ }
+
+
+ /* Emit the (interleaved) headers for the two vertices - an 8-reg
+ * of zeros followed by two sets of NDC coordinates:
+ */
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, offset(m0, 2), ndc);
+ brw_MOV(p, offset(m0, 3), pos);
+
+
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ 0, /* starting mrf reg nr */
+ c->r0, /* src */
+ 0, /* allocate */
+ 1, /* used */
+ c->nr_outputs + 3, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ 0, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+
+}
+
+static void
+post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
+{
+ struct tgsi_parse_context parse;
+ const struct tgsi_token *tokens = c->vp->program.tokens;
+ struct brw_instruction *brw_inst1, *brw_inst2;
+ const struct tgsi_full_instruction *inst1, *inst2;
+ int offset;
+ tgsi_parse_init(&parse, tokens);
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+ if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
+#if 0
+ inst1 = &parse.FullToken.FullInstruction;
+ brw_inst1 = inst1->Data;
+ switch (inst1->Opcode) {
+ case TGSI_OPCODE_CAL:
+ case TGSI_OPCODE_BRA:
+ target_insn = inst1->BranchTarget;
+ inst2 = &c->vp->program.Base.Instructions[target_insn];
+ brw_inst2 = inst2->Data;
+ offset = brw_inst2 - brw_inst1;
+ brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+ break;
+ case TGSI_OPCODE_END:
+ offset = end_inst - brw_inst1;
+ brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+ break;
+ default:
+ break;
+ }
+#endif
+ }
+ }
+ tgsi_parse_free(&parse);
+}
+
+/* Emit the fragment program instructions here.
+ */
+void brw_vs_emit(struct brw_vs_compile *c)
+{
+#define MAX_IFSN 32
+ struct brw_compile *p = &c->func;
+ unsigned insn, if_insn = 0;
+ struct brw_instruction *end_inst;
+ struct brw_instruction *if_inst[MAX_IFSN];
+ struct brw_indirect stack_index = brw_indirect(0, 0);
+ struct tgsi_parse_context parse;
+ const struct tgsi_token *tokens = c->vp->program.tokens;
+ struct brw_prog_info prog_info;
+ unsigned allocated_registers = 0;
+ memset(&prog_info, 0, sizeof(struct brw_prog_info));
+
+ unsigned index;
+ unsigned file;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+#if 0
+ tgsi_parse_init(&parse, tokens);
+ /* Message registers can't be read, so copy the output into GRF register
+ if they are used in source registers */
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+ unsigned i;
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION: {
+ const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
+ struct prog_src_register *src = &inst->SrcReg[i];
+ unsigned index = src->Index;
+ unsigned file = src->File;
+ if (file == TGSI_FILE_OUTPUT)
+ c->output_regs[index].used_in_src = TRUE;
+ }
+ break;
+ default:
+ /* nothing */
+ break;
+ }
+ }
+#endif
+
+ tgsi_parse_init(&parse, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION: {
+ struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
+ /* FIXME: fill in brw_prog_info based on declarations here */
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE: {
+
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION: {
+ if (!allocated_registers) {
+ /* first instruction after declerations.
+ * know that we know used vars allocate
+ * registers */
+ brw_vs_alloc_regs(c, &prog_info);
+ brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+ allocated_registers = 1;
+ }
+ }
+ break;
+ }
+ }
+#if 0
+ struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+ struct brw_reg args[3], dst;
+ unsigned i;
+
+ /* Get argument regs. SWZ is special and does this itself.
+ */
+ inst->Data = &p->store[p->nr_insn];
+ if (inst->Opcode != TGSI_OPCODE_SWZ)
+ for (i = 0; i < 3; i++) {
+ struct prog_src_register *src = &inst->SrcReg[i];
+ index = src->Index;
+ file = src->File;
+ if (file == TGSI_FILE_OUTPUT&&c->output_regs[index].used_in_src)
+ args[i] = c->output_regs[index].reg;
+ else
+ args[i] = get_arg(c, src);
+ }
+
+ /* Get dest regs. Note that it is possible for a reg to be both
+ * dst and arg, given the static allocation of registers. So
+ * care needs to be taken emitting multi-operation instructions.
+ */
+ index = inst->DstReg.Index;
+ file = inst->DstReg.File;
+ if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
+ dst = c->output_regs[index].reg;
+ else
+ dst = get_dst(c, inst->DstReg);
+
+ switch (inst->Opcode) {
+ case TGSI_OPCODE_ABS:
+ brw_MOV(p, dst, brw_abs(args[0]));
+ break;
+ case TGSI_OPCODE_ADD:
+ brw_ADD(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DP3:
+ brw_DP3(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DP4:
+ brw_DP4(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DPH:
+ brw_DPH(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DST:
+ unalias2(c, dst, args[0], args[1], emit_dst_noalias);
+ break;
+ case TGSI_OPCODE_EXP:
+ unalias1(c, dst, args[0], emit_exp_noalias);
+ break;
+ case TGSI_OPCODE_EX2:
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_ARL:
+ emit_arl(c, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ brw_RNDD(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FRC:
+ brw_FRC(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ unalias1(c, dst, args[0], emit_log_noalias);
+ break;
+ case TGSI_OPCODE_LG2:
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_LIT:
+ unalias1(c, dst, args[0], emit_lit_noalias);
+ break;
+ case TGSI_OPCODE_MAD:
+ brw_MOV(p, brw_acc_reg(), args[2]);
+ brw_MAC(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MAX:
+ emit_max(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MIN:
+ emit_min(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MOV:
+ brw_MOV(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_MUL:
+ brw_MUL(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_POW:
+ emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RCP:
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RSQ:
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+
+ case TGSI_OPCODE_SEQ:
+ emit_seq(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SNE:
+ emit_sne(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGE:
+ emit_sge(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGT:
+ emit_sgt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLT:
+ emit_slt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLE:
+ emit_sle(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SUB:
+ brw_ADD(p, dst, args[0], negate(args[1]));
+ break;
+ case TGSI_OPCODE_SWZ:
+ /* The args[0] value can't be used here as it won't have
+ * correctly encoded the full swizzle:
+ */
+ emit_swz(c, dst, inst->SrcReg[0] );
+ break;
+ case TGSI_OPCODE_XPD:
+ emit_xpd(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_IF:
+ assert(if_insn < MAX_IFSN);
+ if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ break;
+ case TGSI_OPCODE_ELSE:
+ if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
+ break;
+ case TGSI_OPCODE_ENDIF:
+ assert(if_insn > 0);
+ brw_ENDIF(p, if_inst[--if_insn]);
+ break;
+ case TGSI_OPCODE_BRA:
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_set_predicate_control_flag_value(p, 0xff);
+ break;
+ case TGSI_OPCODE_CAL:
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(4));
+ inst->Data = &p->store[p->nr_insn];
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case TGSI_OPCODE_RET:
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ case TGSI_OPCODE_END:
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case TGSI_OPCODE_BGNSUB:
+ case TGSI_OPCODE_ENDSUB:
+ break;
+ default:
+ printf("Unsupport opcode %d in vertex shader\n", inst->Opcode);
+ break;
+ }
+
+ if (inst->DstReg.File == TGSI_FILE_OUTPUT
+ && inst->DstReg.Index != VERT_RESULT_HPOS
+ && c->output_regs[inst->DstReg.Index].used_in_src)
+ brw_MOV(p, get_dst(c, inst->DstReg), dst);
+
+ release_tmps(c);
+ }
+#endif
+
+ end_inst = &p->store[p->nr_insn];
+ emit_vertex_write(c, &prog_info);
+ post_vs_emit(c, end_inst);
+ tgsi_parse_free(&parse);
+}
diff --git a/src/mesa/pipe/i965simple/brw_vs_state.c b/src/mesa/pipe/i965simple/brw_vs_state.c
new file mode 100644
index 0000000000..7d6fb383b9
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_vs_state.c
@@ -0,0 +1,102 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#include "pipe/p_util.h"
+
+static void upload_vs_unit( struct brw_context *brw )
+{
+ struct brw_vs_unit_state vs;
+
+ memset(&vs, 0, sizeof(vs));
+
+ /* CACHE_NEW_VS_PROG */
+ vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6;
+ vs.thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
+ vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+ vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
+ vs.thread3.dispatch_grf_start_reg = 1;
+
+
+ /* BRW_NEW_URB_FENCE */
+ vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries;
+ vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+ vs.thread4.max_threads = MIN2(
+ MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1),
+ 15);
+
+
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ vs.thread4.max_threads = 0;
+
+ /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
+ if (0 /*brw->attribs.Clip->ClipPlanesEnabled*/) {
+ /* Note that we read in the userclip planes as well, hence
+ * clip_start:
+ */
+ vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+ }
+ else {
+ vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
+ }
+
+ vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ vs.thread3.urb_entry_read_offset = 0;
+
+ /* No samplers for ARB_vp programs:
+ */
+ vs.vs5.sampler_count = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ vs.thread4.stats_enable = 1;
+
+ /* Vertex program always enabled:
+ */
+ vs.vs6.vs_enable = 1;
+
+ brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs );
+}
+
+
+const struct brw_tracked_state brw_vs_unit = {
+ .dirty = {
+ .brw = (BRW_NEW_CLIP |
+ BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .update = upload_vs_unit
+};
diff --git a/src/mesa/pipe/i965simple/brw_vtbl.c b/src/mesa/pipe/i965simple/brw_vtbl.c
new file mode 100644
index 0000000000..6dc3bd838b
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_vtbl.c
@@ -0,0 +1,149 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+
+#include "brw_draw.h"
+#include "brw_state.h"
+#include "brw_vs.h"
+#include <stdarg.h>
+
+#if 0
+/* called from intelDestroyContext()
+ */
+static void brw_destroy_context( struct intel_context *intel )
+{
+ GLcontext *ctx = &intel->ctx;
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ brw_destroy_metaops(brw);
+ brw_destroy_state(brw);
+ brw_draw_destroy( brw );
+
+ brw_ProgramCacheDestroy( ctx );
+ brw_FrameBufferTexDestroy( brw );
+}
+
+/* called from intelDrawBuffer()
+ */
+static void brw_set_draw_region( struct intel_context *intel,
+ struct intel_region *draw_region,
+ struct intel_region *depth_region)
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ intel_region_release(&brw->state.draw_region);
+ intel_region_release(&brw->state.depth_region);
+ intel_region_reference(&brw->state.draw_region, draw_region);
+ intel_region_reference(&brw->state.depth_region, depth_region);
+}
+
+
+/* called from intelFlushBatchLocked
+ */
+static void brw_lost_hardware( struct intel_context *intel )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ /* Note that we effectively lose the context after this.
+ *
+ * Setting this flag provokes a state buffer wrap and also flushes
+ * the hardware caches.
+ */
+ brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+
+ /* Which means there shouldn't be any commands already queued:
+ */
+ assert(intel->batch->ptr == intel->batch->map);
+
+ brw->state.dirty.mesa |= ~0;
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+}
+
+static void brw_note_fence( struct intel_context *intel,
+ unsigned fence )
+{
+ brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
+}
+
+static void brw_note_unlock( struct intel_context *intel )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ brw_pool_check_wrap(brw, &brw->pool[BRW_GS_POOL]);
+ brw_pool_check_wrap(brw, &brw->pool[BRW_SS_POOL]);
+
+ brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_LOCK;
+}
+
+
+void brw_do_flush( struct brw_context *brw,
+ unsigned flags )
+{
+ struct brw_mi_flush flush;
+ memset(&flush, 0, sizeof(flush));
+ flush.opcode = CMD_MI_FLUSH;
+ flush.flags = flags;
+ BRW_BATCH_STRUCT(brw, &flush);
+}
+
+
+static void brw_emit_flush( struct intel_context *intel,
+ unsigned unused )
+{
+ brw_do_flush(brw_context(&intel->ctx),
+ BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE);
+}
+
+
+/* called from intelWaitForIdle() and intelFlush()
+ *
+ * For now, just flush everything. Could be smarter later.
+ */
+static unsigned brw_flush_cmd( void )
+{
+ struct brw_mi_flush flush;
+ flush.opcode = CMD_MI_FLUSH;
+ flush.pad = 0;
+ flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE;
+ return *(unsigned *)&flush;
+}
+
+static void brw_invalidate_state( struct intel_context *intel, unsigned new_state )
+{
+ /* nothing */
+}
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_winsys.h b/src/mesa/pipe/i965simple/brw_winsys.h
new file mode 100644
index 0000000000..cc0a210a9f
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_winsys.h
@@ -0,0 +1,192 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * This is the interface that i965simple requires any window system
+ * hosting it to implement. This is the only include file in i965simple
+ * which is public.
+ *
+ */
+
+#ifndef BRW_WINSYS_H
+#define BRW_WINSYS_H
+
+
+#include "pipe/p_defines.h"
+
+
+/* Pipe drivers are (meant to be!) independent of both GL and the
+ * window system. The window system provides a buffer manager and a
+ * set of additional hooks for things like command buffer submission,
+ * etc.
+ *
+ * There clearly has to be some agreement between the window system
+ * driver and the hardware driver about the format of command buffers,
+ * etc.
+ */
+
+struct pipe_buffer_handle;
+struct pipe_fence_handle;
+struct pipe_winsys;
+
+/* The pipe driver currently understands the following chipsets:
+ */
+#define PCI_CHIP_I965_G 0x29A2
+#define PCI_CHIP_I965_Q 0x2992
+#define PCI_CHIP_I965_G_1 0x2982
+#define PCI_CHIP_I965_GM 0x2A02
+#define PCI_CHIP_I965_GME 0x2A12
+
+
+/* These are the names of all the state caches managed by the driver.
+ *
+ * When data is uploaded to a buffer with buffer_subdata, we use the
+ * special version of that function below so that information about
+ * what type of data this is can be passed to the winsys backend.
+ * That in turn allows the correct flags to be set in the aub file
+ * dump to allow human-readable file dumps later on.
+ */
+
+enum brw_cache_id {
+ BRW_CC_VP,
+ BRW_CC_UNIT,
+ BRW_WM_PROG,
+ BRW_SAMPLER_DEFAULT_COLOR,
+ BRW_SAMPLER,
+ BRW_WM_UNIT,
+ BRW_SF_PROG,
+ BRW_SF_VP,
+ BRW_SF_UNIT,
+ BRW_VS_UNIT,
+ BRW_VS_PROG,
+ BRW_GS_UNIT,
+ BRW_GS_PROG,
+ BRW_CLIP_VP,
+ BRW_CLIP_UNIT,
+ BRW_CLIP_PROG,
+ BRW_SS_SURFACE,
+ BRW_SS_SURF_BIND,
+
+ BRW_MAX_CACHE
+};
+
+/**
+ * Additional winsys interface for i965simple.
+ *
+ * It is an over-simple batchbuffer mechanism. Will want to improve the
+ * performance of this, perhaps based on the cmdstream stuff. It
+ * would be pretty impossible to implement swz on top of this
+ * interface.
+ *
+ * Will also need additions/changes to implement static/dynamic
+ * indirect state.
+ */
+struct brw_winsys {
+
+ /**
+ * Reserve space on batch buffer.
+ *
+ * Returns a null pointer if there is insufficient space in the batch buffer
+ * to hold the requested number of dwords and relocations.
+ *
+ * The number of dwords should also include the number of relocations.
+ */
+ unsigned *(*batch_start)(struct brw_winsys *sws,
+ unsigned dwords,
+ unsigned relocs);
+
+ void (*batch_dword)(struct brw_winsys *sws,
+ unsigned dword);
+
+ /**
+ * Emit a relocation to a buffer.
+ *
+ * Used not only when the buffer addresses are not pinned, but also to
+ * ensure refered buffers will not be destroyed until the current batch
+ * buffer execution is finished.
+ *
+ * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and
+ * I915_BUFFER_ACCESS_READ macros.
+ */
+ void (*batch_reloc)(struct brw_winsys *sws,
+ struct pipe_buffer_handle *buf,
+ unsigned access_flags,
+ unsigned delta);
+
+
+ /* Not used yet, but really want this:
+ */
+ void (*batch_end)( struct brw_winsys *sws );
+
+ /**
+ * Flush the batch buffer.
+ *
+ * Fence argument must point to NULL or to a previous fence, and the caller
+ * must call fence_reference when done with the fence.
+ */
+ void (*batch_flush)(struct brw_winsys *sws,
+ struct pipe_fence_handle **fence);
+
+
+ /* A version of buffer_subdata that includes information for the
+ * simulator:
+ */
+ void (*buffer_subdata_typed)(struct brw_winsys *sws,
+ struct pipe_buffer_handle *buf,
+ unsigned long offset,
+ unsigned long size,
+ const void *data,
+ unsigned data_type);
+
+
+};
+
+#define BRW_BUFFER_ACCESS_WRITE 0x1
+#define BRW_BUFFER_ACCESS_READ 0x2
+
+#define BRW_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0)
+
+
+struct pipe_context *brw_create(struct pipe_winsys *,
+ struct brw_winsys *,
+ unsigned pci_id);
+
+static inline boolean brw_batchbuffer_data(struct brw_winsys *winsys,
+ const void *data,
+ unsigned bytes)
+{
+ static const unsigned incr = sizeof(unsigned);
+ int i;
+ const unsigned *udata = (const unsigned*)(data);
+ unsigned size = bytes/incr;
+ for (i = 0; i < size; ++i) {
+ winsys->batch_dword(winsys, udata[i]);
+ }
+ return (i == size);
+}
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm.c b/src/mesa/pipe/i965simple/brw_wm.c
new file mode 100644
index 0000000000..65271f22fd
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm.c
@@ -0,0 +1,338 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+#include "brw_state.h"
+
+unsigned brw_wm_nr_args( unsigned opcode )
+{
+ switch (opcode) {
+
+ case WM_PIXELXY:
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_FRC:
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_SCS:
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXD:
+ case TGSI_OPCODE_KIL:
+ case TGSI_OPCODE_LIT:
+ case WM_CINTERP:
+ case WM_WPOSXY:
+ return 1;
+
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_XPD:
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_DST:
+ case WM_LINTERP:
+ case WM_DELTAXY:
+ case WM_PIXELW:
+ return 2;
+
+ case WM_FB_WRITE:
+ case WM_PINTERP:
+ case TGSI_OPCODE_MAD:
+ case TGSI_OPCODE_CMP:
+ case TGSI_OPCODE_LRP:
+ return 3;
+
+ default:
+ return 0;
+ }
+}
+
+
+unsigned brw_wm_is_scalar_result( unsigned opcode )
+{
+ switch (opcode) {
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_DST:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+
+static void do_wm_prog( struct brw_context *brw,
+ struct brw_fragment_program *fp,
+ struct brw_wm_prog_key *key)
+{
+ struct brw_wm_compile *c;
+ const unsigned *program;
+ unsigned program_size;
+
+ c = brw->wm.compile_data;
+ if (c == NULL) {
+ brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
+ c = brw->wm.compile_data;
+ } else {
+ memset(c, 0, sizeof(*brw->wm.compile_data));
+ }
+ memcpy(&c->key, key, sizeof(*key));
+
+ c->fp = fp;
+ fprintf(stderr, "XXXXXXXX FP\n");
+
+#if 0
+ c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
+
+ if (brw_wm_is_glsl(&c->fp->program)) {
+ brw_wm_glsl_emit(c);
+ } else
+ {
+ /* Augment fragment program. Add instructions for pre- and
+ * post-fragment-program tasks such as interpolation and fogging.
+ */
+ brw_wm_pass_fp(c);
+
+ /* Translate to intermediate representation. Build register usage
+ * chains.
+ */
+ brw_wm_pass0(c);
+
+ /* Dead code removal.
+ */
+ brw_wm_pass1(c);
+
+ /* Register allocation.
+ */
+ c->grf_limit = BRW_WM_MAX_GRF/2;
+
+ /* This is where we start emitting gen4 code:
+ */
+ brw_init_compile(&c->func);
+
+ brw_wm_pass2(c);
+
+ c->prog_data.total_grf = c->max_wm_grf;
+ if (c->last_scratch) {
+ c->prog_data.total_scratch =
+ c->last_scratch + 0x40;
+ } else {
+ c->prog_data.total_scratch = 0;
+ }
+
+ /* Emit GEN4 code.
+ */
+ brw_wm_emit(c);
+ }
+ /* get the program
+ */
+ program = brw_get_program(&c->func, &program_size);
+
+ /*
+ */
+ brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG],
+ &c->key,
+ sizeof(c->key),
+ program,
+ program_size,
+ &c->prog_data,
+ &brw->wm.prog_data );
+#endif
+}
+
+
+
+static void brw_wm_populate_key( struct brw_context *brw,
+ struct brw_wm_prog_key *key )
+{
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *)brw->attribs.FragmentProgram;
+ unsigned lookup = 0;
+ unsigned line_aa;
+ unsigned i;
+
+ memset(key, 0, sizeof(*key));
+
+ /* Build the index for table lookup
+ */
+ /* _NEW_COLOR */
+ if (fp->UsesKill ||
+ brw->attribs.AlphaTest->enabled)
+ lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fp->ComputesDepth)
+ lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ /* _NEW_DEPTH */
+ if (brw->attribs.DepthStencil->depth.enabled)
+ lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+ if (brw->attribs.DepthStencil->depth.enabled &&
+ brw->attribs.DepthStencil->depth.writemask) /* ?? */
+ lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+ /* _NEW_STENCIL */
+ if (brw->attribs.DepthStencil->stencil.front_enabled) {
+ lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+ if (brw->attribs.DepthStencil->stencil.write_mask[0] ||
+ (brw->attribs.DepthStencil->stencil.back_enabled &&
+ brw->attribs.DepthStencil->stencil.write_mask[1]))
+ lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+ }
+
+ /* XXX: when should this be disabled?
+ */
+ if (1)
+ lookup |= IZ_EARLY_DEPTH_TEST_BIT;
+
+
+ line_aa = AA_NEVER;
+
+ /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
+ if (brw->attribs.Raster->line_smooth) {
+ if (brw->reduced_primitive == PIPE_PRIM_LINES) {
+ line_aa = AA_ALWAYS;
+ }
+ else if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) {
+ if (brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+ line_aa = AA_SOMETIMES;
+
+ if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE ||
+ (brw->attribs.Raster->cull_mode == PIPE_WINDING_CW))
+ line_aa = AA_ALWAYS;
+ }
+ else if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE) {
+ line_aa = AA_SOMETIMES;
+
+ if (brw->attribs.Raster->cull_mode == PIPE_WINDING_CCW)
+ line_aa = AA_ALWAYS;
+ }
+ }
+ }
+
+ brw_wm_lookup_iz(line_aa,
+ lookup,
+ key);
+
+
+#if 0
+ /* BRW_NEW_WM_INPUT_DIMENSIONS */
+ key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS);
+#endif
+
+ /* _NEW_LIGHT */
+ key->flat_shade = (brw->attribs.Raster->flatshade);
+
+ /* _NEW_TEXTURE */
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ const struct pipe_sampler_state *unit = brw->attribs.Samplers[i];
+
+ if (unit) {
+
+ if (unit->compare &&
+ unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ key->shadowtex_mask |= 1<<i;
+ }
+#if 0
+ if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA)
+ key->yuvtex_mask |= 1<<i;
+#endif
+ }
+ }
+
+
+ /* Extra info:
+ */
+ key->program_string_id = fp->id;
+
+}
+
+
+static void brw_upload_wm_prog( struct brw_context *brw )
+{
+ struct brw_wm_prog_key key;
+ struct brw_fragment_program *fp = (struct brw_fragment_program *)
+ brw->attribs.FragmentProgram;
+
+ brw_wm_populate_key(brw, &key);
+
+ /* Make an early check for the key.
+ */
+ if (brw_search_cache(&brw->cache[BRW_WM_PROG],
+ &key, sizeof(key),
+ &brw->wm.prog_data,
+ &brw->wm.prog_gs_offset))
+ return;
+
+ do_wm_prog(brw, fp, &key);
+}
+
+
+const struct brw_tracked_state brw_wm_prog = {
+ .dirty = {
+ .brw = (BRW_NEW_FS |
+ BRW_NEW_WM_INPUT_DIMENSIONS |
+ BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = 0
+ },
+ .update = brw_upload_wm_prog
+};
+
diff --git a/src/mesa/pipe/i965simple/brw_wm.h b/src/mesa/pipe/i965simple/brw_wm.h
new file mode 100644
index 0000000000..a394e25da3
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm.h
@@ -0,0 +1,274 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_WM_H
+#define BRW_WM_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+/* A big lookup table is used to figure out which and how many
+ * additional regs will inserted before the main payload in the WM
+ * program execution. These mainly relate to depth and stencil
+ * processing and the early-depth-test optimization.
+ */
+#define IZ_PS_KILL_ALPHATEST_BIT 0x1
+#define IZ_PS_COMPUTES_DEPTH_BIT 0x2
+#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4
+#define IZ_DEPTH_TEST_ENABLE_BIT 0x8
+#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10
+#define IZ_STENCIL_TEST_ENABLE_BIT 0x20
+#define IZ_EARLY_DEPTH_TEST_BIT 0x40
+#define IZ_BIT_MAX 0x80
+
+#define AA_NEVER 0
+#define AA_SOMETIMES 1
+#define AA_ALWAYS 2
+
+struct brw_wm_prog_key {
+ unsigned source_depth_reg:3;
+ unsigned aa_dest_stencil_reg:3;
+ unsigned dest_depth_reg:3;
+ unsigned nr_depth_regs:3;
+ unsigned projtex_mask:8;
+ unsigned shadowtex_mask:8;
+ unsigned computes_depth:1; /* could be derived from program string */
+ unsigned source_depth_to_render_target:1;
+ unsigned flat_shade:1;
+ unsigned runtime_check_aads_emit:1;
+
+ unsigned yuvtex_mask:8;
+ unsigned pad1:24;
+
+ unsigned program_string_id:32;
+};
+
+
+/* A bit of a glossary:
+ *
+ * brw_wm_value: A computed value or program input. Values are
+ * constant, they are created once and are never modified. When a
+ * fragment program register is written or overwritten, new values are
+ * created fresh, preserving the rule that values are constant.
+ *
+ * brw_wm_ref: A reference to a value. Wherever a value used is by an
+ * instruction or as a program output, that is tracked with an
+ * instance of this struct. All references to a value occur after it
+ * is created. After the last reference, a value is dead and can be
+ * discarded.
+ *
+ * brw_wm_grf: Represents a physical hardware register. May be either
+ * empty or hold a value. Register allocation is the process of
+ * assigning values to grf registers. This occurs in pass2 and the
+ * brw_wm_grf struct is not used before that.
+ *
+ * Fragment program registers: These are time-varying constructs that
+ * are hard to reason about and which we translate away in pass0. A
+ * single fragment program register element (eg. temp[0].x) will be
+ * translated to one or more brw_wm_value structs, one for each time
+ * that temp[0].x is written to during the program.
+ */
+
+
+
+/* Used in pass2 to track register allocation.
+ */
+struct brw_wm_grf {
+ struct brw_wm_value *value;
+ unsigned nextuse;
+};
+
+struct brw_wm_value {
+ struct brw_reg hw_reg; /* emitted to this reg, may not always be there */
+ struct brw_wm_ref *lastuse;
+ struct brw_wm_grf *resident;
+ unsigned contributes_to_output:1;
+ unsigned spill_slot:16; /* if non-zero, spill immediately after calculation */
+};
+
+struct brw_wm_ref {
+ struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */
+ struct brw_wm_value *value;
+ struct brw_wm_ref *prevuse;
+ unsigned unspill_reg:7; /* unspill to reg */
+ unsigned emitted:1;
+ unsigned insn:24;
+};
+
+struct brw_wm_constref {
+ const struct brw_wm_ref *ref;
+ float constval;
+};
+
+
+struct brw_wm_instruction {
+ struct brw_wm_value *dst[4];
+ struct brw_wm_ref *src[3][4];
+ unsigned opcode:8;
+ unsigned saturate:1;
+ unsigned writemask:4;
+ unsigned tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */
+ unsigned tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
+};
+
+#define PROGRAM_INTERNAL_PARAM
+#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */
+#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_ATTRIB_MAX + 3)
+#define BRW_WM_MAX_GRF 128 /* hardware limit */
+#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4)
+#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12)
+#define BRW_WM_MAX_PARAM 256
+#define BRW_WM_MAX_CONST 256
+#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
+
+
+
+/* New opcodes to track internal operations required for WM unit.
+ * These are added early so that the registers used can be tracked,
+ * freed and reused like those of other instructions.
+ */
+#define WM_PIXELXY (TGSI_OPCODE_LAST)
+#define WM_DELTAXY (TGSI_OPCODE_LAST + 1)
+#define WM_PIXELW (TGSI_OPCODE_LAST + 2)
+#define WM_LINTERP (TGSI_OPCODE_LAST + 3)
+#define WM_PINTERP (TGSI_OPCODE_LAST + 4)
+#define WM_CINTERP (TGSI_OPCODE_LAST + 5)
+#define WM_WPOSXY (TGSI_OPCODE_LAST + 6)
+#define WM_FB_WRITE (TGSI_OPCODE_LAST + 7)
+#define MAX_WM_OPCODE (TGSI_OPCODE_LAST + 8)
+
+#define PAYLOAD_DEPTH (PIPE_ATTRIB_MAX)
+
+struct brw_wm_compile {
+ struct brw_compile func;
+ struct brw_wm_prog_key key;
+ struct brw_wm_prog_data prog_data;
+
+ struct brw_fragment_program *fp;
+
+ float (*env_param)[4];
+
+ enum {
+ START,
+ PASS2_DONE
+ } state;
+
+ /* Initial pass - translate fp instructions to fp instructions,
+ * simplifying and adding instructions for interpolation and
+ * framebuffer writes.
+ */
+ const struct pipe_shader_state *prog_instructions;
+ unsigned nr_fp_insns;
+ unsigned fp_temp;
+ unsigned fp_interp_emitted;
+ unsigned fp_deriv_emitted;
+
+ struct tgsi_src_register pixel_xy;
+ struct tgsi_src_register delta_xy;
+ struct tgsi_src_register pixel_w;
+
+
+ struct brw_wm_value vreg[BRW_WM_MAX_VREG];
+ unsigned nr_vreg;
+
+ struct brw_wm_value creg[BRW_WM_MAX_PARAM];
+ unsigned nr_creg;
+
+ struct {
+ struct brw_wm_value depth[4]; /* includes r0/r1 */
+ struct brw_wm_value input_interp[PIPE_ATTRIB_MAX];
+ } payload;
+
+
+ const struct brw_wm_ref *pass0_fp_reg[16][256][4];
+
+ struct brw_wm_ref undef_ref;
+ struct brw_wm_value undef_value;
+
+ struct brw_wm_ref refs[BRW_WM_MAX_REF];
+ unsigned nr_refs;
+
+ struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
+ unsigned nr_insns;
+
+ struct brw_wm_constref constref[BRW_WM_MAX_CONST];
+ unsigned nr_constrefs;
+
+ struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
+
+ unsigned grf_limit;
+ unsigned max_wm_grf;
+ unsigned last_scratch;
+
+ struct {
+ boolean inited;
+ struct brw_reg reg;
+ } wm_regs[16][256][4];
+ struct brw_reg stack;
+ struct brw_reg emit_mask_reg;
+ unsigned reg_index;
+ unsigned tmp_index;
+};
+
+
+unsigned brw_wm_nr_args( unsigned opcode );
+unsigned brw_wm_is_scalar_result( unsigned opcode );
+
+void brw_wm_pass_fp( struct brw_wm_compile *c );
+void brw_wm_pass0( struct brw_wm_compile *c );
+void brw_wm_pass1( struct brw_wm_compile *c );
+void brw_wm_pass2( struct brw_wm_compile *c );
+void brw_wm_emit( struct brw_wm_compile *c );
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+ struct brw_wm_value *value );
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+ struct brw_wm_ref *ref );
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst );
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+ const char *stage );
+
+void brw_wm_lookup_iz( unsigned line_aa,
+ unsigned lookup,
+ struct brw_wm_prog_key *key );
+
+#if 0
+boolean brw_wm_is_glsl(struct gl_fragment_program *fp);
+void brw_wm_glsl_emit(struct brw_wm_compile *c);
+#endif
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_debug.c b/src/mesa/pipe/i965simple/brw_wm_debug.c
new file mode 100644
index 0000000000..b31a608901
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_debug.c
@@ -0,0 +1,172 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+#if 0
+void brw_wm_print_value( struct brw_wm_compile *c,
+ struct brw_wm_value *value )
+{
+ assert(value);
+ if (c->state >= PASS2_DONE)
+ brw_print_reg(value->hw_reg);
+ else if( value == &c->undef_value )
+ _mesa_printf("undef");
+ else if( value - c->vreg >= 0 &&
+ value - c->vreg < BRW_WM_MAX_VREG)
+ _mesa_printf("r%d", value - c->vreg);
+ else if (value - c->creg >= 0 &&
+ value - c->creg < BRW_WM_MAX_PARAM)
+ _mesa_printf("c%d", value - c->creg);
+ else if (value - c->payload.input_interp >= 0 &&
+ value - c->payload.input_interp < FRAG_ATTRIB_MAX)
+ _mesa_printf("i%d", value - c->payload.input_interp);
+ else if (value - c->payload.depth >= 0 &&
+ value - c->payload.depth < FRAG_ATTRIB_MAX)
+ _mesa_printf("d%d", value - c->payload.depth);
+ else
+ _mesa_printf("?");
+}
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+ struct brw_wm_ref *ref )
+{
+ struct brw_reg hw_reg = ref->hw_reg;
+
+ if (ref->unspill_reg)
+ _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot);
+
+ if (c->state >= PASS2_DONE)
+ brw_print_reg(ref->hw_reg);
+ else {
+ _mesa_printf("%s", hw_reg.negate ? "-" : "");
+ _mesa_printf("%s", hw_reg.abs ? "abs/" : "");
+ brw_wm_print_value(c, ref->value);
+ if ((hw_reg.nr&1) || hw_reg.subnr) {
+ _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
+ }
+ }
+}
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst )
+{
+ unsigned i, arg;
+ unsigned nr_args = brw_wm_nr_args(inst->opcode);
+
+ _mesa_printf("[");
+ for (i = 0; i < 4; i++) {
+ if (inst->dst[i]) {
+ brw_wm_print_value(c, inst->dst[i]);
+ if (inst->dst[i]->spill_slot)
+ _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot);
+ }
+ else
+ _mesa_printf("#");
+ if (i < 3)
+ _mesa_printf(",");
+ }
+ _mesa_printf("]");
+
+ if (inst->writemask != WRITEMASK_XYZW)
+ _mesa_printf(".%s%s%s%s",
+ GET_BIT(inst->writemask, 0) ? "x" : "",
+ GET_BIT(inst->writemask, 1) ? "y" : "",
+ GET_BIT(inst->writemask, 2) ? "z" : "",
+ GET_BIT(inst->writemask, 3) ? "w" : "");
+
+ switch (inst->opcode) {
+ case WM_PIXELXY:
+ _mesa_printf(" = PIXELXY");
+ break;
+ case WM_DELTAXY:
+ _mesa_printf(" = DELTAXY");
+ break;
+ case WM_PIXELW:
+ _mesa_printf(" = PIXELW");
+ break;
+ case WM_WPOSXY:
+ _mesa_printf(" = WPOSXY");
+ break;
+ case WM_PINTERP:
+ _mesa_printf(" = PINTERP");
+ break;
+ case WM_LINTERP:
+ _mesa_printf(" = LINTERP");
+ break;
+ case WM_CINTERP:
+ _mesa_printf(" = CINTERP");
+ break;
+ case WM_FB_WRITE:
+ _mesa_printf(" = FB_WRITE");
+ break;
+ default:
+ _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode));
+ break;
+ }
+
+ if (inst->saturate)
+ _mesa_printf("_SAT");
+
+ for (arg = 0; arg < nr_args; arg++) {
+
+ _mesa_printf(" [");
+
+ for (i = 0; i < 4; i++) {
+ if (inst->src[arg][i]) {
+ brw_wm_print_ref(c, inst->src[arg][i]);
+ }
+ else
+ _mesa_printf("%%");
+
+ if (i < 3)
+ _mesa_printf(",");
+ else
+ _mesa_printf("]");
+ }
+ }
+ _mesa_printf("\n");
+}
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+ const char *stage )
+{
+ unsigned insn;
+
+ _mesa_printf("\n\n\n%s:\n", stage);
+ for (insn = 0; insn < c->nr_insns; insn++)
+ brw_wm_print_insn(c, &c->instruction[insn]);
+ _mesa_printf("\n\n\n");
+}
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_emit.c b/src/mesa/pipe/i965simple/brw_wm_emit.c
new file mode 100644
index 0000000000..75bc4cd419
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_emit.c
@@ -0,0 +1,1289 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+#define SATURATE (1<<5)
+
+/* Not quite sure how correct this is - need to understand horiz
+ * vs. vertical strides a little better.
+ */
+static __inline struct brw_reg sechalf( struct brw_reg reg )
+{
+ if (reg.vstride)
+ reg.nr++;
+ return reg;
+}
+
+/* Payload R0:
+ *
+ * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
+ * corresponding to each of the 16 execution channels.
+ * R0.1..8 -- ?
+ * R1.0 -- triangle vertex 0.X
+ * R1.1 -- triangle vertex 0.Y
+ * R1.2 -- tile 0 x,y coords (2 packed uwords)
+ * R1.3 -- tile 1 x,y coords (2 packed uwords)
+ * R1.4 -- tile 2 x,y coords (2 packed uwords)
+ * R1.5 -- tile 3 x,y coords (2 packed uwords)
+ * R1.6 -- ?
+ * R1.7 -- ?
+ * R1.8 -- ?
+ */
+
+
+static void emit_pixel_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ /* Calculate pixel centers by adding 1 or 0 to each of the
+ * micro-tile coordinates passed in r1.
+ */
+ if (mask & TGSI_WRITEMASK_X) {
+ brw_ADD(p,
+ vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+ }
+
+ if (mask & TGSI_WRITEMASK_Y) {
+ brw_ADD(p,
+ vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw,5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+ }
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+}
+
+
+
+static void emit_delta_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ if (mask & TGSI_WRITEMASK_X) {
+ brw_ADD(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_UW),
+ negate(r1));
+ }
+
+ if (mask & TGSI_WRITEMASK_Y) {
+ brw_ADD(p,
+ dst[1],
+ retype(arg0[1], BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
+
+ }
+}
+
+static void emit_wpos_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0)
+{
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ if (mask & TGSI_WRITEMASK_X) {
+ brw_MOV(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_UW));
+ }
+
+ if (mask & TGSI_WRITEMASK_Y) {
+ /* TODO -- window_height - Y */
+ brw_MOV(p,
+ dst[1],
+ negate(retype(arg0[1], BRW_REGISTER_TYPE_UW)));
+
+ }
+}
+
+
+static void emit_pixel_w( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
+{
+ /* Don't need this if all you are doing is interpolating color, for
+ * instance.
+ */
+ if (mask & TGSI_WRITEMASK_W) {
+ struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
+
+ /* Calc 1/w - just linterp wpos[3] optimized by putting the
+ * result straight into a message reg.
+ */
+ brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+
+ /* Calc w */
+ brw_math_16( p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ }
+}
+
+
+
+static void emit_linterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas )
+{
+ struct brw_reg interp[4];
+ unsigned nr = arg0[0].nr;
+ unsigned i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ }
+ }
+}
+
+
+static void emit_pinterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas,
+ const struct brw_reg *w)
+{
+ struct brw_reg interp[4];
+ unsigned nr = arg0[0].nr;
+ unsigned i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ brw_MUL(p, dst[i], dst[i], w[3]);
+ }
+ }
+}
+
+static void emit_cinterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0 )
+{
+ struct brw_reg interp[4];
+ unsigned nr = arg0[0].nr;
+ unsigned i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
+ }
+ }
+}
+
+
+
+
+
+static void emit_alu1( struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0 )
+{
+ unsigned i;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ func(p, dst[i], arg0[i]);
+ }
+ }
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+static void emit_alu2( struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ unsigned i;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ func(p, dst[i], arg0[i], arg1[i]);
+ }
+ }
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_mad( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MUL(p, dst[i], arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_ADD(p, dst[i], dst[i], arg2[i]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+
+static void emit_lrp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ unsigned i;
+
+ /* Uses dst as a temporary:
+ */
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ /* Can I use the LINE instruction for this?
+ */
+ brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[i], arg0[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+static void emit_sop( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ unsigned cond,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(0));
+ brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
+ brw_MOV(p, dst[i], brw_imm_f(1.0));
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_slt( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
+}
+
+static void emit_sle( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
+}
+
+static void emit_sgt( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
+}
+
+static void emit_sge( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
+}
+
+static void emit_seq( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
+}
+
+static void emit_sne( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
+}
+
+static void emit_cmp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg2[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_max( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg0[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_min( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg0[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+
+static void emit_dp3( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ assert((mask & TGSI_WRITEMASK_XYZW) == TGSI_WRITEMASK_X);
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[0], arg0[2], arg1[2]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_dp4( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ assert((mask & TGSI_WRITEMASK_XYZW) == TGSI_WRITEMASK_X);
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+ brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[0], arg0[3], arg1[3]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_dph( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ assert((mask & TGSI_WRITEMASK_XYZW) == TGSI_WRITEMASK_X);
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+ brw_MAC(p, dst[0], arg0[2], arg1[2]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_ADD(p, dst[0], dst[0], arg1[3]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_xpd( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ unsigned i;
+
+ assert(!(mask & TGSI_WRITEMASK_W) == TGSI_WRITEMASK_X);
+
+ for (i = 0 ; i < 3; i++) {
+ if (mask & (1<<i)) {
+ unsigned i2 = (i+2)%3;
+ unsigned i1 = (i+1)%3;
+
+ brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+
+static void emit_math1( struct brw_compile *p,
+ unsigned function,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0 )
+{
+ //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
+ // function == BRW_MATH_FUNCTION_SINCOS);
+
+ brw_MOV(p, brw_message_reg(2), arg0[0]);
+
+ /* Send two messages to perform all 16 operations:
+ */
+ brw_math_16(p,
+ dst[0],
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+}
+
+
+static void emit_math2( struct brw_compile *p,
+ unsigned function,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
+{
+ assert((mask & TGSI_WRITEMASK_XYZW) == TGSI_WRITEMASK_X);
+
+ brw_push_insn_state(p);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(2), arg0[0]);
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(3), arg1[0]);
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+
+
+ /* Send two messages to perform all 16 operations:
+ */
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p,
+ dst[0],
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[0],1),
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 4,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_pop_insn_state(p);
+}
+
+
+
+static void emit_tex( struct brw_wm_compile *c,
+ const struct brw_wm_instruction *inst,
+ struct brw_reg *dst,
+ unsigned dst_flags,
+ struct brw_reg *arg )
+{
+ struct brw_compile *p = &c->func;
+ unsigned msgLength, responseLength;
+ boolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
+ unsigned i, nr;
+ unsigned emit;
+
+ /* How many input regs are there?
+ */
+ switch (inst->tex_idx) {
+ case TGSI_TEXTURE_1D:
+ emit = TGSI_WRITEMASK_X;
+ nr = 1;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ emit = TGSI_WRITEMASK_XY;
+ nr = 2;
+ break;
+ default:
+ emit = TGSI_WRITEMASK_XYZ;
+ nr = 3;
+ break;
+ }
+
+ if (shadow) {
+ nr = 4;
+ emit |= TGSI_WRITEMASK_W;
+ }
+
+ msgLength = 1;
+
+ for (i = 0; i < nr; i++) {
+ static const unsigned swz[4] = {0,1,2,2};
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
+ else
+ brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
+ msgLength += 2;
+ }
+
+ responseLength = 8; /* always */
+
+ brw_SAMPLE(p,
+ retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+ inst->tex_unit + 1, /* surface */
+ inst->tex_unit, /* sampler */
+ inst->writemask,
+ (shadow ?
+ BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
+ BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
+ responseLength,
+ msgLength,
+ 0);
+
+ if (shadow)
+ brw_MOV(p, dst[3], brw_imm_f(1.0));
+}
+
+
+static void emit_txb( struct brw_wm_compile *c,
+ const struct brw_wm_instruction *inst,
+ struct brw_reg *dst,
+ unsigned dst_flags,
+ struct brw_reg *arg )
+{
+ struct brw_compile *p = &c->func;
+ unsigned msgLength;
+
+ /* Shadow ignored for txb.
+ */
+ switch (inst->tex_idx) {
+ case TGSI_TEXTURE_1D:
+ brw_MOV(p, brw_message_reg(2), arg[0]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ brw_MOV(p, brw_message_reg(2), arg[0]);
+ brw_MOV(p, brw_message_reg(4), arg[1]);
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ break;
+ default:
+ brw_MOV(p, brw_message_reg(2), arg[0]);
+ brw_MOV(p, brw_message_reg(4), arg[1]);
+ brw_MOV(p, brw_message_reg(6), arg[2]);
+ break;
+ }
+
+ brw_MOV(p, brw_message_reg(8), arg[3]);
+ msgLength = 9;
+
+
+ brw_SAMPLE(p,
+ retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+ inst->tex_unit + 1, /* surface */
+ inst->tex_unit, /* sampler */
+ inst->writemask,
+ BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
+ 8, /* responseLength */
+ msgLength,
+ 0);
+
+}
+
+
+static void emit_lit( struct brw_compile *p,
+ const struct brw_reg *dst,
+ unsigned mask,
+ const struct brw_reg *arg0 )
+{
+ assert((mask & TGSI_WRITEMASK_XW) == 0);
+
+ if (mask & TGSI_WRITEMASK_Y) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[1], arg0[0]);
+ brw_set_saturate(p, 0);
+ }
+
+ if (mask & TGSI_WRITEMASK_Z) {
+ emit_math2(p, BRW_MATH_FUNCTION_POW,
+ &dst[2],
+ TGSI_WRITEMASK_X | (mask & SATURATE),
+ &arg0[1],
+ &arg0[3]);
+ }
+
+ /* Ordinarily you'd use an iff statement to skip or shortcircuit
+ * some of the POW calculations above, but 16-wide iff statements
+ * seem to lock c1 hardware, so this is a nasty workaround:
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
+ {
+ if (mask & TGSI_WRITEMASK_Y)
+ brw_MOV(p, dst[1], brw_imm_f(0));
+
+ if (mask & TGSI_WRITEMASK_Z)
+ brw_MOV(p, dst[2], brw_imm_f(0));
+ }
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+/* Kill pixel - set execution mask to zero for those pixels which
+ * fail.
+ */
+static void emit_kil( struct brw_wm_compile *c,
+ struct brw_reg *arg0)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ unsigned i;
+
+
+ /* XXX - usually won't need 4 compares!
+ */
+ for (i = 0; i < 4; i++) {
+ brw_push_insn_state(p);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
+ brw_set_predicate_control_flag_value(p, 0xff);
+ brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+ brw_pop_insn_state(p);
+ }
+}
+
+static void fire_fb_write( struct brw_wm_compile *c,
+ unsigned base_reg,
+ unsigned nr )
+{
+ struct brw_compile *p = &c->func;
+
+ /* Pass through control information:
+ */
+/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(base_reg + 1),
+ brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+ }
+
+ /* Send framebuffer write message: */
+/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
+ brw_fb_WRITE(p,
+ retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ base_reg,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ 0, /* render surface always 0 */
+ nr,
+ 0,
+ 1);
+}
+
+static void emit_aa( struct brw_wm_compile *c,
+ struct brw_reg *arg1,
+ unsigned reg )
+{
+ struct brw_compile *p = &c->func;
+ unsigned comp = c->key.aa_dest_stencil_reg / 2;
+ unsigned off = c->key.aa_dest_stencil_reg % 2;
+ struct brw_reg aa = offset(arg1[comp], off);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
+ brw_MOV(p, brw_message_reg(reg), aa);
+ brw_pop_insn_state(p);
+}
+
+
+/* Post-fragment-program processing. Send the results to the
+ * framebuffer.
+ */
+static void emit_fb_write( struct brw_wm_compile *c,
+ struct brw_reg *arg0,
+ struct brw_reg *arg1,
+ struct brw_reg *arg2)
+{
+ struct brw_compile *p = &c->func;
+ unsigned nr = 2;
+ unsigned channel;
+
+ /* Reserve a space for AA - may not be needed:
+ */
+ if (c->key.aa_dest_stencil_reg)
+ nr += 1;
+
+ /* I don't really understand how this achieves the color interleave
+ * (ie RGBARGBA) in the result: [Do the saturation here]
+ */
+ {
+ brw_push_insn_state(p);
+
+ for (channel = 0; channel < 4; channel++) {
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(nr + channel),
+ arg0[channel]);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p,
+ brw_message_reg(nr + channel + 4),
+ sechalf(arg0[channel]));
+ }
+
+ /* skip over the regs populated above:
+ */
+ nr += 8;
+
+ brw_pop_insn_state(p);
+ }
+
+ if (c->key.source_depth_to_render_target)
+ {
+ if (c->key.computes_depth)
+ brw_MOV(p, brw_message_reg(nr), arg2[2]);
+ else
+ brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
+
+ nr += 2;
+ }
+
+ if (c->key.dest_depth_reg)
+ {
+ unsigned comp = c->key.dest_depth_reg / 2;
+ unsigned off = c->key.dest_depth_reg % 2;
+
+ if (off != 0) {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(nr), arg1[comp]);
+ /* 2nd half? */
+ brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1));
+ brw_pop_insn_state(p);
+ }
+ else {
+ brw_MOV(p, brw_message_reg(nr), arg1[comp]);
+ }
+ nr += 2;
+ }
+
+
+ if (!c->key.runtime_check_aads_emit) {
+ if (c->key.aa_dest_stencil_reg)
+ emit_aa(c, arg1, 2);
+
+ fire_fb_write(c, 0, nr);
+ }
+ else {
+ struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+ struct brw_reg ip = brw_ip_reg();
+ struct brw_instruction *jmp;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p,
+ v1_null_ud,
+ get_element_ud(brw_vec8_grf(1,0), 6),
+ brw_imm_ud(1<<26));
+
+ jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ {
+ emit_aa(c, arg1, 2);
+ fire_fb_write(c, 0, nr);
+ /* note - thread killed in subroutine */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ /* ELSE: Shuffle up one register to fill in the hole left for AA:
+ */
+ fire_fb_write(c, 1, nr-1);
+ }
+}
+
+
+
+
+/* Post-fragment-program processing. Send the results to the
+ * framebuffer.
+ */
+static void emit_spill( struct brw_wm_compile *c,
+ struct brw_reg reg,
+ unsigned slot )
+{
+ struct brw_compile *p = &c->func;
+
+ /*
+ mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
+ */
+ brw_MOV(p, brw_message_reg(2), reg);
+
+ /*
+ mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
+ send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
+ */
+ brw_dp_WRITE_16(p,
+ retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
+ 1,
+ slot);
+}
+
+static void emit_unspill( struct brw_wm_compile *c,
+ struct brw_reg reg,
+ unsigned slot )
+{
+ struct brw_compile *p = &c->func;
+
+ /* Slot 0 is the undef value.
+ */
+ if (slot == 0) {
+ brw_MOV(p, reg, brw_imm_f(0));
+ return;
+ }
+
+ /*
+ mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
+ send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
+ */
+
+ brw_dp_READ_16(p,
+ retype(vec16(reg), BRW_REGISTER_TYPE_UW),
+ 1,
+ slot);
+}
+
+
+
+/**
+ * Retrieve upto 4 GEN4 register pairs for the given wm reg:
+ */
+static void get_argument_regs( struct brw_wm_compile *c,
+ struct brw_wm_ref *arg[],
+ struct brw_reg *regs )
+{
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ if (arg[i]) {
+
+ if (arg[i]->unspill_reg)
+ emit_unspill(c,
+ brw_vec8_grf(arg[i]->unspill_reg, 0),
+ arg[i]->value->spill_slot);
+
+ regs[i] = arg[i]->hw_reg;
+ }
+ else {
+ regs[i] = brw_null_reg();
+ }
+ }
+}
+
+static void spill_values( struct brw_wm_compile *c,
+ struct brw_wm_value *values,
+ unsigned nr )
+{
+ unsigned i;
+
+ for (i = 0; i < nr; i++)
+ if (values[i].spill_slot)
+ emit_spill(c, values[i].hw_reg, values[i].spill_slot);
+}
+
+
+
+/* Emit the fragment program instructions here.
+ */
+void brw_wm_emit( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ unsigned insn;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ /* Check if any of the payload regs need to be spilled:
+ */
+ spill_values(c, c->payload.depth, 4);
+ spill_values(c, c->creg, c->nr_creg);
+ spill_values(c, c->payload.input_interp, PIPE_ATTRIB_MAX);
+
+
+ for (insn = 0; insn < c->nr_insns; insn++) {
+
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+ struct brw_reg args[3][4], dst[4];
+ unsigned i, dst_flags;
+
+ /* Get argument regs:
+ */
+ for (i = 0; i < 3; i++)
+ get_argument_regs(c, inst->src[i], args[i]);
+
+ /* Get dest regs:
+ */
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i])
+ dst[i] = inst->dst[i]->hw_reg;
+ else
+ dst[i] = brw_null_reg();
+
+ /* Flags
+ */
+ dst_flags = inst->writemask;
+ if (inst->saturate)
+ dst_flags |= SATURATE;
+
+ switch (inst->opcode) {
+ /* Generated instructions for calculating triangle interpolants:
+ */
+ case WM_PIXELXY:
+ emit_pixel_xy(p, dst, dst_flags, args[0]);
+ break;
+
+ case WM_DELTAXY:
+ emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case WM_WPOSXY:
+ emit_wpos_xy(p, dst, dst_flags, args[0]);
+ break;
+
+ case WM_PIXELW:
+ emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case WM_LINTERP:
+ emit_linterp(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case WM_PINTERP:
+ emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case WM_CINTERP:
+ emit_cinterp(p, dst, dst_flags, args[0]);
+ break;
+
+ case WM_FB_WRITE:
+ emit_fb_write(c, args[0], args[1], args[2]);
+ break;
+
+ /* Straightforward arithmetic:
+ */
+ case TGSI_OPCODE_ADD:
+ emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_FRC:
+ emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_FLR:
+ emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_DP3: /* */
+ emit_dp3(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_DP4:
+ emit_dp4(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_DPH:
+ emit_dph(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_LRP: /* */
+ emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case TGSI_OPCODE_MAD:
+ emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case TGSI_OPCODE_MOV:
+ emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_MUL:
+ emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_XPD:
+ emit_xpd(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ /* Higher math functions:
+ */
+ case TGSI_OPCODE_RCP:
+ emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_SIN:
+ emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_COS:
+ emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_EX2:
+ emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_LG2:
+ emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_SCS:
+ /* There is an scs math function, but it would need some
+ * fixup for 16-element execution.
+ */
+ if (dst_flags & TGSI_WRITEMASK_X)
+ emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|TGSI_WRITEMASK_X,
+ args[0]);
+ if (dst_flags & TGSI_WRITEMASK_Y)
+ emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|TGSI_WRITEMASK_X,
+ args[0]);
+ break;
+
+ case TGSI_OPCODE_POW:
+ emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
+ break;
+
+ /* Comparisons:
+ */
+ case TGSI_OPCODE_CMP:
+ emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case TGSI_OPCODE_MAX:
+ emit_max(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_MIN:
+ emit_min(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_SLT:
+ emit_slt(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_SLE:
+ emit_sle(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGT:
+ emit_sgt(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGE:
+ emit_sge(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SEQ:
+ emit_seq(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SNE:
+ emit_sne(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_LIT:
+ emit_lit(p, dst, dst_flags, args[0]);
+ break;
+
+ /* Texturing operations:
+ */
+ case TGSI_OPCODE_TEX:
+ emit_tex(c, inst, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_TXB:
+ emit_txb(c, inst, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_KIL:
+ emit_kil(c, args[0]);
+ break;
+
+ default:
+ printf("unsupport opcode %d in fragment program\n",
+ inst->opcode);
+ }
+
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i] && inst->dst[i]->spill_slot)
+ emit_spill(c,
+ inst->dst[i]->hw_reg,
+ inst->dst[i]->spill_slot);
+ }
+}
diff --git a/src/mesa/pipe/i965simple/brw_wm_fp.c b/src/mesa/pipe/i965simple/brw_wm_fp.c
new file mode 100644
index 0000000000..20e90bc612
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_fp.c
@@ -0,0 +1,1007 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "brw_util.h"
+
+
+#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
+
+#define X 0
+#define Y 1
+#define Z 2
+#define W 3
+
+
+static const char *wm_opcode_strings[] = {
+ "PIXELXY",
+ "DELTAXY",
+ "PIXELW",
+ "LINTERP",
+ "PINTERP",
+ "CINTERP",
+ "WPOSXY",
+ "FB_WRITE"
+};
+
+#if 0
+static const char *wm_file_strings[] = {
+ "PAYLOAD"
+};
+#endif
+
+
+/***********************************************************************
+ * Source regs
+ */
+#if 0
+static struct prog_src_register src_reg(unsigned file, unsigned idx)
+{
+ struct prog_src_register reg;
+ reg.File = file;
+ reg.Index = idx;
+ reg.Swizzle = SWIZZLE_NOOP;
+ reg.RelAddr = 0;
+ reg.NegateBase = 0;
+ reg.Abs = 0;
+ reg.NegateAbs = 0;
+ return reg;
+}
+
+static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
+{
+ return src_reg(dst.File, dst.Index);
+}
+
+static struct prog_src_register src_undef( void )
+{
+ return src_reg(PROGRAM_UNDEFINED, 0);
+}
+
+static boolean src_is_undef(struct prog_src_register src)
+{
+ return src.File == PROGRAM_UNDEFINED;
+}
+
+static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
+{
+ reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
+ return reg;
+}
+
+static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
+{
+ return src_swizzle(reg, x, x, x, x);
+}
+
+
+/***********************************************************************
+ * Dest regs
+ */
+
+static struct prog_dst_register dst_reg(unsigned file, unsigned idx)
+{
+ struct prog_dst_register reg;
+ reg.File = file;
+ reg.Index = idx;
+ reg.WriteMask = WRITEMASK_XYZW;
+ reg.CondMask = 0;
+ reg.CondSwizzle = 0;
+ reg.pad = 0;
+ reg.CondSrc = 0;
+ return reg;
+}
+
+static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
+{
+ reg.WriteMask &= mask;
+ return reg;
+}
+
+static struct prog_dst_register dst_undef( void )
+{
+ return dst_reg(PROGRAM_UNDEFINED, 0);
+}
+
+
+
+static struct prog_dst_register get_temp( struct brw_wm_compile *c )
+{
+ int bit = ffs( ~c->fp_temp );
+
+ if (!bit) {
+ _mesa_printf("%s: out of temporaries\n", __FILE__);
+ exit(1);
+ }
+
+ c->fp_temp |= 1<<(bit-1);
+ return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
+}
+
+
+static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
+{
+ c->fp_temp &= ~1<<(temp.Index + 1 - FIRST_INTERNAL_TEMP);
+}
+
+
+/***********************************************************************
+ * Instructions
+ */
+
+static const struct tgsi_token *get_fp_inst(struct brw_wm_compile *c)
+{
+ return &c->prog_instructions->tokens[c->nr_fp_insns++];
+}
+
+static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
+ const struct prog_instruction *inst0)
+{
+ struct prog_instruction *inst = get_fp_inst(c);
+ *inst = *inst0;
+ inst->Data = (void *)inst0;
+ return inst;
+}
+
+static struct prog_instruction * emit_op(struct brw_wm_compile *c,
+ unsigned op,
+ struct prog_dst_register dest,
+ unsigned saturate,
+ unsigned tex_src_unit,
+ unsigned tex_src_target,
+ struct prog_src_register src0,
+ struct prog_src_register src1,
+ struct prog_src_register src2 )
+{
+ struct prog_instruction *inst = get_fp_inst(c);
+
+ memset(inst, 0, sizeof(*inst));
+
+ inst->Opcode = op;
+ inst->DstReg = dest;
+ inst->SaturateMode = saturate;
+ inst->TexSrcUnit = tex_src_unit;
+ inst->TexSrcTarget = tex_src_target;
+ inst->SrcReg[0] = src0;
+ inst->SrcReg[1] = src1;
+ inst->SrcReg[2] = src2;
+ return inst;
+}
+
+
+
+
+/***********************************************************************
+ * Special instructions for interpolation and other tasks
+ */
+
+static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->pixel_xy)) {
+ struct prog_dst_register pixel_xy = get_temp(c);
+ struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+
+
+ /* Emit the out calculations, and hold onto the results. Use
+ * two instructions as a temporary is required.
+ */
+ /* pixel_xy.xy = PIXELXY payload[0];
+ */
+ emit_op(c,
+ WM_PIXELXY,
+ dst_mask(pixel_xy, WRITEMASK_XY),
+ 0, 0, 0,
+ payload_r0_depth,
+ src_undef(),
+ src_undef());
+
+ c->pixel_xy = src_reg_from_dst(pixel_xy);
+ }
+
+ return c->pixel_xy;
+}
+
+static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->delta_xy)) {
+ struct prog_dst_register delta_xy = get_temp(c);
+ struct prog_src_register pixel_xy = get_pixel_xy(c);
+ struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+
+ /* deltas.xy = DELTAXY pixel_xy, payload[0]
+ */
+ emit_op(c,
+ WM_DELTAXY,
+ dst_mask(delta_xy, WRITEMASK_XY),
+ 0, 0, 0,
+ pixel_xy,
+ payload_r0_depth,
+ src_undef());
+
+ c->delta_xy = src_reg_from_dst(delta_xy);
+ }
+
+ return c->delta_xy;
+}
+
+static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->pixel_w)) {
+ struct prog_dst_register pixel_w = get_temp(c);
+ struct prog_src_register deltas = get_delta_xy(c);
+ struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
+
+
+ /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
+ */
+ emit_op(c,
+ WM_PIXELW,
+ dst_mask(pixel_w, WRITEMASK_W),
+ 0, 0, 0,
+ interp_wpos,
+ deltas,
+ src_undef());
+
+
+ c->pixel_w = src_reg_from_dst(pixel_w);
+ }
+
+ return c->pixel_w;
+}
+
+static void emit_interp( struct brw_wm_compile *c,
+ unsigned idx )
+{
+ struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
+ struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+ struct prog_src_register deltas = get_delta_xy(c);
+ struct prog_src_register arg2;
+ unsigned opcode;
+
+ /* Need to use PINTERP on attributes which have been
+ * multiplied by 1/W in the SF program, and LINTERP on those
+ * which have not:
+ */
+ switch (idx) {
+ case FRAG_ATTRIB_WPOS:
+ opcode = WM_LINTERP;
+ arg2 = src_undef();
+
+ /* Have to treat wpos.xy specially:
+ */
+ emit_op(c,
+ WM_WPOSXY,
+ dst_mask(dst, WRITEMASK_XY),
+ 0, 0, 0,
+ get_pixel_xy(c),
+ src_undef(),
+ src_undef());
+
+ dst = dst_mask(dst, WRITEMASK_ZW);
+
+ /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+ */
+ emit_op(c,
+ WM_LINTERP,
+ dst,
+ 0, 0, 0,
+ interp,
+ deltas,
+ arg2);
+ break;
+ case FRAG_ATTRIB_COL0:
+ case FRAG_ATTRIB_COL1:
+ if (c->key.flat_shade) {
+ emit_op(c,
+ WM_CINTERP,
+ dst,
+ 0, 0, 0,
+ interp,
+ src_undef(),
+ src_undef());
+ }
+ else {
+ emit_op(c,
+ WM_LINTERP,
+ dst,
+ 0, 0, 0,
+ interp,
+ deltas,
+ src_undef());
+ }
+ break;
+ default:
+ emit_op(c,
+ WM_PINTERP,
+ dst,
+ 0, 0, 0,
+ interp,
+ deltas,
+ get_pixel_w(c));
+ break;
+ }
+
+ c->fp_interp_emitted |= 1<<idx;
+}
+
+static void emit_ddx( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ unsigned idx = inst->SrcReg[0].Index;
+ struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+
+ c->fp_deriv_emitted |= 1<<idx;
+ emit_op(c,
+ OPCODE_DDX,
+ inst->DstReg,
+ 0, 0, 0,
+ interp,
+ get_pixel_w(c),
+ src_undef());
+}
+
+static void emit_ddy( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ unsigned idx = inst->SrcReg[0].Index;
+ struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+
+ c->fp_deriv_emitted |= 1<<idx;
+ emit_op(c,
+ OPCODE_DDY,
+ inst->DstReg,
+ 0, 0, 0,
+ interp,
+ get_pixel_w(c),
+ src_undef());
+}
+
+/***********************************************************************
+ * Hacks to extend the program parameter and constant lists.
+ */
+
+/* Add the fog parameters to the parameter list of the original
+ * program, rather than creating a new list. Doesn't really do any
+ * harm and it's not as if the parameter handling isn't a big hack
+ * anyway.
+ */
+static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
+ int s0,
+ int s1,
+ int s2,
+ int s3,
+ int s4)
+{
+ struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
+ gl_state_index tokens[STATE_LENGTH];
+ unsigned idx;
+ tokens[0] = s0;
+ tokens[1] = s1;
+ tokens[2] = s2;
+ tokens[3] = s3;
+ tokens[4] = s4;
+
+ for (idx = 0; idx < paramList->NumParameters; idx++) {
+ if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
+ memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
+ return src_reg(PROGRAM_STATE_VAR, idx);
+ }
+
+ idx = _mesa_add_state_reference( paramList, tokens );
+
+ /* Recalculate state dependency:
+ */
+ c->fp->param_state = paramList->StateFlags;
+
+ return src_reg(PROGRAM_STATE_VAR, idx);
+}
+
+
+static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
+ float s0,
+ float s1,
+ float s2,
+ float s3)
+{
+ struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
+ float values[4];
+ unsigned idx;
+ unsigned swizzle;
+
+ values[0] = s0;
+ values[1] = s1;
+ values[2] = s2;
+ values[3] = s3;
+
+ /* Have to search, otherwise multiple compilations will each grow
+ * the parameter list.
+ */
+ for (idx = 0; idx < paramList->NumParameters; idx++) {
+ if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
+ memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
+
+ /* XXX: this mimics the mesa bug which puts all constants and
+ * parameters into the "PROGRAM_STATE_VAR" category:
+ */
+ return src_reg(PROGRAM_STATE_VAR, idx);
+ }
+
+ idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
+ /* XXX what about swizzle? */
+ return src_reg(PROGRAM_STATE_VAR, idx);
+}
+
+
+
+/***********************************************************************
+ * Expand various instructions here to simpler forms.
+ */
+static void precalc_dst( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct prog_src_register src0 = inst->SrcReg[0];
+ struct prog_src_register src1 = inst->SrcReg[1];
+ struct prog_dst_register dst = inst->DstReg;
+
+ if (dst.WriteMask & WRITEMASK_Y) {
+ /* dst.y = mul src0.y, src1.y
+ */
+ emit_op(c,
+ OPCODE_MUL,
+ dst_mask(dst, WRITEMASK_Y),
+ inst->SaturateMode, 0, 0,
+ src0,
+ src1,
+ src_undef());
+ }
+
+
+ if (dst.WriteMask & WRITEMASK_XZ) {
+ unsigned z = GET_SWZ(src0.Swizzle, Z);
+
+ /* dst.xz = swz src0.1zzz
+ */
+ emit_op(c,
+ OPCODE_SWZ,
+ dst_mask(dst, WRITEMASK_XZ),
+ inst->SaturateMode, 0, 0,
+ src_swizzle(src0, SWIZZLE_ONE, z, z, z),
+ src_undef(),
+ src_undef());
+ }
+ if (dst.WriteMask & WRITEMASK_W) {
+ /* dst.w = mov src1.w
+ */
+ emit_op(c,
+ OPCODE_MOV,
+ dst_mask(dst, WRITEMASK_W),
+ inst->SaturateMode, 0, 0,
+ src1,
+ src_undef(),
+ src_undef());
+ }
+}
+
+
+static void precalc_lit( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct prog_src_register src0 = inst->SrcReg[0];
+ struct prog_dst_register dst = inst->DstReg;
+
+ if (dst.WriteMask & WRITEMASK_XW) {
+ /* dst.xw = swz src0.1111
+ */
+ emit_op(c,
+ OPCODE_SWZ,
+ dst_mask(dst, WRITEMASK_XW),
+ 0, 0, 0,
+ src_swizzle1(src0, SWIZZLE_ONE),
+ src_undef(),
+ src_undef());
+ }
+
+
+ if (dst.WriteMask & WRITEMASK_YZ) {
+ emit_op(c,
+ OPCODE_LIT,
+ dst_mask(dst, WRITEMASK_YZ),
+ inst->SaturateMode, 0, 0,
+ src0,
+ src_undef(),
+ src_undef());
+ }
+}
+
+static void precalc_tex( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct prog_src_register coord;
+ struct prog_dst_register tmpcoord;
+
+ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
+ struct prog_instruction *out;
+ struct prog_dst_register tmp0 = get_temp(c);
+ struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
+ struct prog_dst_register tmp1 = get_temp(c);
+ struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
+ struct prog_src_register src0 = inst->SrcReg[0];
+
+ tmpcoord = get_temp(c);
+ coord = src_reg_from_dst(tmpcoord);
+
+ out = emit_op(c, OPCODE_MOV,
+ tmpcoord,
+ 0, 0, 0,
+ src0,
+ src_undef(),
+ src_undef());
+ out->SrcReg[0].NegateBase = 0;
+ out->SrcReg[0].Abs = 1;
+
+ emit_op(c, OPCODE_MAX,
+ tmp0,
+ 0, 0, 0,
+ src_swizzle1(coord, X),
+ src_swizzle1(coord, Y),
+ src_undef());
+
+ emit_op(c, OPCODE_MAX,
+ tmp1,
+ 0, 0, 0,
+ tmp0src,
+ src_swizzle1(coord, Z),
+ src_undef());
+
+ emit_op(c, OPCODE_RCP,
+ tmp0,
+ 0, 0, 0,
+ tmp1src,
+ src_undef(),
+ src_undef());
+
+ emit_op(c, OPCODE_MUL,
+ tmpcoord,
+ 0, 0, 0,
+ src0,
+ tmp0src,
+ src_undef());
+
+ release_temp(c, tmp0);
+ release_temp(c, tmp1);
+ } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+ struct prog_src_register scale =
+ search_or_add_param5( c,
+ STATE_INTERNAL,
+ STATE_TEXRECT_SCALE,
+ inst->TexSrcUnit,
+ 0,0 );
+
+ tmpcoord = get_temp(c);
+
+ /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
+ */
+ emit_op(c,
+ OPCODE_MUL,
+ tmpcoord,
+ 0, 0, 0,
+ inst->SrcReg[0],
+ scale,
+ src_undef());
+
+ coord = src_reg_from_dst(tmpcoord);
+ }
+ else {
+ coord = inst->SrcReg[0];
+ }
+
+ /* Need to emit YUV texture conversions by hand. Probably need to
+ * do this here - the alternative is in brw_wm_emit.c, but the
+ * conversion requires allocating a temporary variable which we
+ * don't have the facility to do that late in the compilation.
+ */
+ if (!(c->key.yuvtex_mask & (1<<inst->TexSrcUnit))) {
+ emit_op(c,
+ OPCODE_TEX,
+ inst->DstReg,
+ inst->SaturateMode,
+ inst->TexSrcUnit,
+ inst->TexSrcTarget,
+ coord,
+ src_undef(),
+ src_undef());
+ }
+ else {
+ /*
+ CONST C0 = { -.5, -.0625, -.5, 1.164 }
+ CONST C1 = { 1.596, -0.813, 2.018, -.391 }
+ UYV = TEX ...
+ UYV.xyz = ADD UYV, C0
+ UYV.y = MUL UYV.y, C0.w
+ RGB.xyz = MAD UYV.xxz, C1, UYV.y
+ RGB.y = MAD UYV.z, C1.w, RGB.y
+ */
+ struct prog_dst_register dst = inst->DstReg;
+ struct prog_src_register src0 = inst->SrcReg[0];
+ struct prog_dst_register tmp = get_temp(c);
+ struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
+ struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
+ struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
+
+ /* tmp = TEX ...
+ */
+ emit_op(c,
+ OPCODE_TEX,
+ tmp,
+ inst->SaturateMode,
+ inst->TexSrcUnit,
+ inst->TexSrcTarget,
+ src0,
+ src_undef(),
+ src_undef());
+
+ /* tmp.xyz = ADD TMP, C0
+ */
+ emit_op(c,
+ OPCODE_ADD,
+ dst_mask(tmp, WRITEMASK_XYZ),
+ 0, 0, 0,
+ tmpsrc,
+ C0,
+ src_undef());
+
+ /* YUV.y = MUL YUV.y, C0.w
+ */
+ emit_op(c,
+ OPCODE_MUL,
+ dst_mask(tmp, WRITEMASK_Y),
+ 0, 0, 0,
+ tmpsrc,
+ src_swizzle1(C0, W),
+ src_undef());
+
+ /* RGB.xyz = MAD YUV.xxz, C1, YUV.y
+ */
+ emit_op(c,
+ OPCODE_MAD,
+ dst_mask(dst, WRITEMASK_XYZ),
+ 0, 0, 0,
+ src_swizzle(tmpsrc, X,X,Z,Z),
+ C1,
+ src_swizzle1(tmpsrc, Y));
+
+ /* RGB.y = MAD YUV.z, C1.w, RGB.y
+ */
+ emit_op(c,
+ OPCODE_MAD,
+ dst_mask(dst, WRITEMASK_Y),
+ 0, 0, 0,
+ src_swizzle1(tmpsrc, Z),
+ src_swizzle1(C1, W),
+ src_swizzle1(src_reg_from_dst(dst), Y));
+
+ release_temp(c, tmp);
+ }
+
+ if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV)
+ release_temp(c, tmpcoord);
+}
+
+
+static boolean projtex( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct prog_src_register src = inst->SrcReg[0];
+
+ /* Only try to detect the simplest cases. Could detect (later)
+ * cases where we are trying to emit code like RCP {1.0}, MUL x,
+ * {1.0}, and so on.
+ *
+ * More complex cases than this typically only arise from
+ * user-provided fragment programs anyway:
+ */
+ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
+ return 0; /* ut2004 gun rendering !?! */
+ else if (src.File == PROGRAM_INPUT &&
+ GET_SWZ(src.Swizzle, W) == W &&
+ (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
+ return 0;
+ else
+ return 1;
+}
+
+
+static void precalc_txp( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct prog_src_register src0 = inst->SrcReg[0];
+
+ if (projtex(c, inst)) {
+ struct prog_dst_register tmp = get_temp(c);
+ struct prog_instruction tmp_inst;
+
+ /* tmp0.w = RCP inst.arg[0][3]
+ */
+ emit_op(c,
+ OPCODE_RCP,
+ dst_mask(tmp, WRITEMASK_W),
+ 0, 0, 0,
+ src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
+ src_undef(),
+ src_undef());
+
+ /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
+ */
+ emit_op(c,
+ OPCODE_MUL,
+ dst_mask(tmp, WRITEMASK_XYZ),
+ 0, 0, 0,
+ src0,
+ src_swizzle1(src_reg_from_dst(tmp), W),
+ src_undef());
+
+ /* dst = precalc(TEX tmp0)
+ */
+ tmp_inst = *inst;
+ tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
+ precalc_tex(c, &tmp_inst);
+
+ release_temp(c, tmp);
+ }
+ else
+ {
+ /* dst = precalc(TEX src0)
+ */
+ precalc_tex(c, inst);
+ }
+}
+
+
+
+
+
+/***********************************************************************
+ * Add instructions to perform fog blending
+ */
+
+static void fog_blend( struct brw_wm_compile *c,
+ struct prog_src_register fog_factor )
+{
+ struct prog_dst_register outcolor = dst_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
+ struct prog_src_register fogcolor = search_or_add_param5( c, STATE_FOG_COLOR, 0,0,0,0 );
+
+ /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */
+
+ emit_op(c,
+ OPCODE_LRP,
+ dst_mask(outcolor, WRITEMASK_XYZ),
+ 0, 0, 0,
+ fog_factor,
+ src_reg_from_dst(outcolor),
+ fogcolor);
+}
+
+
+
+/* This one is simple - just take the interpolated fog coordinate and
+ * use it as the fog blend factor.
+ */
+static void fog_interpolated( struct brw_wm_compile *c )
+{
+ struct prog_src_register fogc = src_reg(PROGRAM_INPUT, FRAG_ATTRIB_FOGC);
+
+ if (!(c->fp_interp_emitted & (1<<FRAG_ATTRIB_FOGC)))
+ emit_interp(c, FRAG_ATTRIB_FOGC);
+
+ fog_blend( c, src_swizzle1(fogc, GET_SWZ(fogc.Swizzle,X)));
+}
+
+static void emit_fog( struct brw_wm_compile *c )
+{
+ if (!c->fp->program.FogOption)
+ return;
+
+ if (1)
+ fog_interpolated( c );
+ else {
+ /* TODO: per-pixel fog */
+ assert(0);
+ }
+}
+
+static void emit_fb_write( struct brw_wm_compile *c )
+{
+ struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
+ struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+ struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
+
+ emit_op(c,
+ WM_FB_WRITE,
+ dst_mask(dst_undef(),0),
+ 0, 0, 0,
+ outcolor,
+ payload_r0_depth,
+ outdepth);
+}
+
+
+
+
+/***********************************************************************
+ * Emit INTERP instructions ahead of first use of each attrib.
+ */
+
+static void validate_src_regs( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ unsigned nr_args = brw_wm_nr_args( inst->Opcode );
+ unsigned i;
+
+ for (i = 0; i < nr_args; i++) {
+ if (inst->SrcReg[i].File == PROGRAM_INPUT) {
+ unsigned idx = inst->SrcReg[i].Index;
+ if (!(c->fp_interp_emitted & (1<<idx))) {
+ emit_interp(c, idx);
+ }
+ }
+ }
+}
+
+
+
+static void print_insns( const struct prog_instruction *insn,
+ unsigned nr )
+{
+ unsigned i;
+ for (i = 0; i < nr; i++, insn++) {
+ _mesa_printf("%3d: ", i);
+ if (insn->Opcode < MAX_OPCODE)
+ _mesa_print_instruction(insn);
+ else if (insn->Opcode < MAX_WM_OPCODE) {
+ unsigned idx = insn->Opcode - MAX_OPCODE;
+
+ _mesa_print_alu_instruction(insn,
+ wm_opcode_strings[idx],
+ 3);
+ }
+ else
+ _mesa_printf("UNKNOWN\n");
+
+ }
+}
+void brw_wm_pass_fp( struct brw_wm_compile *c )
+{
+ struct brw_fragment_program *fp = c->fp;
+ unsigned insn;
+ if (BRW_DEBUG & DEBUG_WM) {
+ _mesa_printf("\n\n\npre-fp:\n");
+ _mesa_print_program(&fp->program.Base);
+ _mesa_printf("\n");
+ }
+
+ c->pixel_xy = src_undef();
+ c->delta_xy = src_undef();
+ c->pixel_w = src_undef();
+ c->nr_fp_insns = 0;
+
+ /* Emit preamble instructions:
+ */
+
+
+ for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
+ const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
+ struct prog_instruction *out;
+
+ /* Check for INPUT values, emit INTERP instructions where
+ * necessary:
+ */
+ validate_src_regs(c, inst);
+
+
+ switch (inst->Opcode) {
+ case OPCODE_SWZ:
+ out = emit_insn(c, inst);
+ out->Opcode = OPCODE_MOV;
+ break;
+
+ case OPCODE_ABS:
+ out = emit_insn(c, inst);
+ out->Opcode = OPCODE_MOV;
+ out->SrcReg[0].NegateBase = 0;
+ out->SrcReg[0].Abs = 1;
+ break;
+
+ case OPCODE_SUB:
+ out = emit_insn(c, inst);
+ out->Opcode = OPCODE_ADD;
+ out->SrcReg[1].NegateBase ^= 0xf;
+ break;
+
+ case OPCODE_SCS:
+ out = emit_insn(c, inst);
+ /* This should probably be done in the parser.
+ */
+ out->DstReg.WriteMask &= WRITEMASK_XY;
+ break;
+
+ case OPCODE_DST:
+ precalc_dst(c, inst);
+ break;
+
+ case OPCODE_LIT:
+ precalc_lit(c, inst);
+ break;
+
+ case OPCODE_TXP:
+ precalc_txp(c, inst);
+ break;
+
+ case OPCODE_XPD:
+ out = emit_insn(c, inst);
+ /* This should probably be done in the parser.
+ */
+ out->DstReg.WriteMask &= WRITEMASK_XYZ;
+ break;
+
+ case OPCODE_KIL:
+ out = emit_insn(c, inst);
+ /* This should probably be done in the parser.
+ */
+ out->DstReg.WriteMask = 0;
+ break;
+ case OPCODE_DDX:
+ emit_ddx(c, inst);
+ break;
+ case OPCODE_DDY:
+ emit_ddy(c, inst);
+ break;
+ case OPCODE_END:
+ emit_fog(c);
+ emit_fb_write(c);
+ break;
+ case OPCODE_PRINT:
+ break;
+
+ default:
+ emit_insn(c, inst);
+ break;
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ _mesa_printf("\n\n\npass_fp:\n");
+ print_insns( c->prog_instructions, c->nr_fp_insns );
+ _mesa_printf("\n");
+ }
+}
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_glsl.c b/src/mesa/pipe/i965simple/brw_wm_glsl.c
new file mode 100644
index 0000000000..90e73a605a
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_glsl.c
@@ -0,0 +1,1356 @@
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+
+#if 0
+
+/* Only guess, need a flag in gl_fragment_program later */
+boolean brw_wm_is_glsl(struct gl_fragment_program *fp)
+{
+ int i;
+ for (i = 0; i < fp->Base.NumInstructions; i++) {
+ struct prog_instruction *inst = &fp->Base.Instructions[i];
+ switch (inst->Opcode) {
+ case OPCODE_IF:
+ case OPCODE_INT:
+ case OPCODE_ENDIF:
+ case OPCODE_CAL:
+ case OPCODE_BRK:
+ case OPCODE_RET:
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ case OPCODE_BGNLOOP:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+ return FALSE;
+}
+
+static void set_reg(struct brw_wm_compile *c, int file, int index,
+ int component, struct brw_reg reg)
+{
+ c->wm_regs[file][index][component].reg = reg;
+ c->wm_regs[file][index][component].inited = TRUE;
+}
+
+static int get_scalar_dst_index(struct prog_instruction *inst)
+{
+ int i;
+ for (i = 0; i < 4; i++)
+ if (inst->DstReg.WriteMask & (1<<i))
+ break;
+ return i;
+}
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+ struct brw_reg reg;
+ reg = brw_vec8_grf(c->tmp_index--, 0);
+ return reg;
+}
+
+static void release_tmps(struct brw_wm_compile *c)
+{
+ c->tmp_index = 127;
+}
+
+static struct brw_reg
+get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, unsigned neg, unsigned abs)
+{
+ struct brw_reg reg;
+ switch (file) {
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM:
+ file = PROGRAM_STATE_VAR;
+ break;
+ case PROGRAM_UNDEFINED:
+ return brw_null_reg();
+ default:
+ break;
+ }
+
+ if(c->wm_regs[file][index][component].inited)
+ reg = c->wm_regs[file][index][component].reg;
+ else
+ reg = brw_vec8_grf(c->reg_index, 0);
+
+ if(!c->wm_regs[file][index][component].inited) {
+ set_reg(c, file, index, component, reg);
+ c->reg_index++;
+ }
+
+ if (neg & (1<< component)) {
+ reg = negate(reg);
+ }
+ if (abs)
+ reg = brw_abs(reg);
+ return reg;
+}
+
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+ int i, j;
+ struct brw_reg reg;
+ int nr_interp_regs = 0;
+ unsigned inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
+
+ for (i = 0; i < 4; i++) {
+ reg = (i < c->key.nr_depth_regs)
+ ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0);
+ set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
+ }
+ c->reg_index += 2*c->key.nr_depth_regs;
+ {
+ int nr_params = c->fp->program.Base.Parameters->NumParameters;
+ struct gl_program_parameter_list *plist =
+ c->fp->program.Base.Parameters;
+ int index = 0;
+ c->prog_data.nr_params = 4*nr_params;
+ for (i = 0; i < nr_params; i++) {
+ for (j = 0; j < 4; j++, index++) {
+ reg = brw_vec1_grf(c->reg_index + index/8,
+ index%8);
+ c->prog_data.param[index] =
+ &plist->ParameterValues[i][j];
+ set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
+ }
+ }
+ c->nr_creg = 2*((4*nr_params+15)/16);
+ c->reg_index += c->nr_creg;
+ }
+ for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
+ if (inputs & (1<<i)) {
+ nr_interp_regs++;
+ reg = brw_vec8_grf(c->reg_index, 0);
+ for (j = 0; j < 4; j++)
+ set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
+ c->reg_index += 2;
+
+ }
+ }
+ c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.urb_read_length = nr_interp_regs * 2;
+ c->prog_data.curb_read_length = c->nr_creg;
+ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+ c->reg_index++;
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+ c->reg_index += 2;
+}
+
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+ struct prog_instruction *inst, int component, int nr)
+{
+ return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
+ 0, 0);
+}
+
+static struct brw_reg get_src_reg(struct brw_wm_compile *c,
+ struct prog_src_register *src, int index, int nr)
+{
+ int component = GET_SWZ(src->Swizzle, index);
+ return get_reg(c, src->File, src->Index, component, nr,
+ src->NegateBase, src->Abs);
+}
+
+static void emit_abs( struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ for (i = 0; i < 4; i++) {
+ if (inst->DstReg.WriteMask & (1<<i)) {
+ struct brw_reg src, dst;
+ dst = get_dst_reg(c, inst, i, 1);
+ src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ brw_MOV(p, dst, brw_abs(src));
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_int( struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg src, dst;
+ dst = get_dst_reg(c, inst, i, 1) ;
+ src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ brw_RNDD(p, dst, src);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_mov( struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg src, dst;
+ dst = get_dst_reg(c, inst, i, 1);
+ src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ brw_MOV(p, dst, src);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_pixel_xy(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+ struct brw_reg dst0, dst1;
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+
+ dst0 = get_dst_reg(c, inst, 0, 1);
+ dst1 = get_dst_reg(c, inst, 1, 1);
+ /* Calculate pixel centers by adding 1 or 0 to each of the
+ * micro-tile coordinates passed in r1.
+ */
+ if (mask & WRITEMASK_X) {
+ brw_ADD(p,
+ vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ brw_ADD(p,
+ vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+ }
+
+}
+
+static void emit_delta_xy(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg dst0, dst1, src0, src1;
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+
+ dst0 = get_dst_reg(c, inst, 0, 1);
+ dst1 = get_dst_reg(c, inst, 1, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1);
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ if (mask & WRITEMASK_X) {
+ brw_ADD(p,
+ dst0,
+ retype(src0, BRW_REGISTER_TYPE_UW),
+ negate(r1));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ brw_ADD(p,
+ dst1,
+ retype(src1, BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
+
+ }
+
+}
+
+
+static void fire_fb_write( struct brw_wm_compile *c,
+ unsigned base_reg,
+ unsigned nr )
+{
+ struct brw_compile *p = &c->func;
+
+ /* Pass through control information:
+ */
+ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+ brw_MOV(p,
+ brw_message_reg(base_reg + 1),
+ brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+ }
+ /* Send framebuffer write message: */
+ brw_fb_WRITE(p,
+ retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ base_reg,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ 0, /* render surface always 0 */
+ nr,
+ 0,
+ 1);
+}
+
+static void emit_fb_write(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ int nr = 2;
+ int channel;
+ struct brw_reg src0;//, src1, src2, dst;
+
+ /* Reserve a space for AA - may not be needed:
+ */
+ if (c->key.aa_dest_stencil_reg)
+ nr += 1;
+ {
+ brw_push_insn_state(p);
+ for (channel = 0; channel < 4; channel++) {
+ src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1);
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+ brw_MOV(p, brw_message_reg(nr + channel), src0);
+ }
+ /* skip over the regs populated above: */
+ nr += 8;
+ brw_pop_insn_state(p);
+ }
+ fire_fb_write(c, 0, nr);
+}
+
+static void emit_pixel_w( struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ if (mask & WRITEMASK_W) {
+ struct brw_reg dst, src0, delta0, delta1;
+ struct brw_reg interp3;
+
+ dst = get_dst_reg(c, inst, 3, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+ delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+
+ interp3 = brw_vec1_grf(src0.nr+1, 4);
+ /* Calc 1/w - just linterp wpos[3] optimized by putting the
+ * result straight into a message reg.
+ */
+ brw_LINE(p, brw_null_reg(), interp3, delta0);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
+
+ /* Calc w */
+ brw_math_16( p, dst,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ }
+}
+
+static void emit_linterp(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ struct brw_reg interp[4];
+ struct brw_reg dst, delta0, delta1;
+ struct brw_reg src0;
+
+ src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+ delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+ unsigned nr = src0.nr;
+ int i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_LINE(p, brw_null_reg(), interp[i], delta0);
+ brw_MAC(p, dst, suboffset(interp[i],1), delta1);
+ }
+ }
+}
+
+static void emit_cinterp(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+
+ struct brw_reg interp[4];
+ struct brw_reg dst, src0;
+
+ src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ unsigned nr = src0.nr;
+ int i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV(p, dst, suboffset(interp[i],3));
+ }
+ }
+}
+
+static void emit_pinterp(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+
+ struct brw_reg interp[4];
+ struct brw_reg dst, delta0, delta1;
+ struct brw_reg src0, w;
+
+ src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+ delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+ w = get_src_reg(c, &inst->SrcReg[2], 3, 1);
+ unsigned nr = src0.nr;
+ int i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_LINE(p, brw_null_reg(), interp[i], delta0);
+ brw_MAC(p, dst, suboffset(interp[i],1),
+ delta1);
+ brw_MUL(p, dst, dst, w);
+ }
+ }
+}
+
+static void emit_xpd(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ for (i = 0; i < 4; i++) {
+ unsigned i2 = (i+2)%3;
+ unsigned i1 = (i+1)%3;
+ if (mask & (1<<i)) {
+ struct brw_reg src0, src1, dst;
+ dst = get_dst_reg(c, inst, i, 1);
+ src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1));
+ src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1);
+ brw_MUL(p, brw_null_reg(), src0, src1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1);
+ src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1);
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ brw_MAC(p, dst, src0, src1);
+ brw_set_saturate(p, 0);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dp3(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_reg src0[3], src1[3], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ for (i = 0; i < 3; i++) {
+ src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ }
+
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dp4(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ }
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0[3], src1[3]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dph(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ }
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_ADD(p, dst, src0[3], src1[3]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_math1(struct brw_wm_compile *c,
+ struct prog_instruction *inst, unsigned func)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+
+ src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_math(p,
+ dst,
+ func,
+ (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_rcp(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
+}
+
+static void emit_rsq(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+}
+
+static void emit_sin(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
+}
+
+static void emit_cos(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+}
+
+static void emit_ex2(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
+}
+
+static void emit_lg2(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
+}
+
+static void emit_add(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, dst;
+ unsigned mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ brw_ADD(p, dst, src0, src1);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_sub(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, dst;
+ unsigned mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ brw_ADD(p, dst, src0, negate(src1));
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_mul(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, dst;
+ unsigned mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ brw_MUL(p, dst, src0, src1);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_frc(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ unsigned mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ brw_FRC(p, dst, src0);
+ }
+ }
+ if (inst->SaturateMode != SATURATE_OFF)
+ brw_set_saturate(p, 0);
+}
+
+static void emit_flr(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ unsigned mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ brw_RNDD(p, dst, src0);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_max(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ struct brw_reg src0, src1, dst;
+ int i;
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MOV(p, dst, src0);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, src1);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+ brw_pop_insn_state(p);
+}
+
+static void emit_min(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ struct brw_reg src0, src1, dst;
+ int i;
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MOV(p, dst, src0);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, src1);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+ brw_pop_insn_state(p);
+}
+
+static void emit_pow(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst, src0, src1;
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_MOV(p, brw_message_reg(3), src1);
+
+ brw_math(p,
+ dst,
+ BRW_MATH_FUNCTION_POW,
+ (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_lrp(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
+ int i;
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+
+ src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+
+ if (src1.nr == dst.nr) {
+ tmp1 = alloc_tmp(c);
+ brw_MOV(p, tmp1, src1);
+ } else
+ tmp1 = src1;
+
+ src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
+ if (src2.nr == dst.nr) {
+ tmp2 = alloc_tmp(c);
+ brw_MOV(p, tmp2, src2);
+ } else
+ tmp2 = src2;
+
+ brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst, tmp2);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0, tmp1);
+ brw_set_saturate(p, 0);
+ }
+ release_tmps(c);
+ }
+}
+
+static void emit_kil(struct brw_wm_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, depth, c->emit_mask_reg, depth);
+ brw_pop_insn_state(p);
+}
+
+static void emit_mad(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, src0, src1, src2;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
+ brw_MUL(p, dst, src0, src1);
+
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_ADD(p, dst, dst, src2);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+static void emit_sop(struct brw_wm_compile *c,
+ struct prog_instruction *inst, unsigned cond)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, src0, src1;
+ int i;
+
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ brw_CMP(p, brw_null_reg(), cond, src0, src1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, dst, brw_imm_f(0.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, brw_imm_f(1.0));
+ }
+ }
+ brw_pop_insn_state(p);
+}
+
+static void emit_slt(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_GE);
+}
+
+static void emit_seq(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+}
+
+static void emit_ddx(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ struct brw_reg interp[4];
+ struct brw_reg dst;
+ struct brw_reg src0, w;
+ unsigned nr, i;
+ src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
+ nr = src0.nr;
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV(p, dst, interp[i]);
+ brw_MUL(p, dst, dst, w);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_ddy(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ struct brw_reg interp[4];
+ struct brw_reg dst;
+ struct brw_reg src0, w;
+ unsigned nr, i;
+
+ src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ nr = src0.nr;
+ w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV(p, dst, suboffset(interp[i], 1));
+ brw_MUL(p, dst, dst, w);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_wpos_xy(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->DstReg.WriteMask;
+ struct brw_reg src0[2], dst[2];
+
+ dst[0] = get_dst_reg(c, inst, 0, 1);
+ dst[1] = get_dst_reg(c, inst, 1, 1);
+
+ src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1);
+
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ if (mask & WRITEMASK_X) {
+ brw_MOV(p,
+ dst[0],
+ retype(src0[0], BRW_REGISTER_TYPE_UW));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ /* TODO -- window_height - Y */
+ brw_MOV(p,
+ dst[1],
+ retype(src0[1], BRW_REGISTER_TYPE_UW));
+
+ }
+}
+
+/* TODO
+ BIAS on SIMD8 not workind yet...
+ */
+static void emit_txb(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst[4], src[4], payload_reg;
+ unsigned i;
+ payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i, 1);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+
+ switch (inst->TexSrcTarget) {
+ case TEXTURE_1D_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ break;
+ default:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), src[2]);
+ break;
+ }
+ brw_MOV(p, brw_message_reg(5), src[3]);
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(payload_reg, BRW_REGISTER_TYPE_UW),
+ inst->TexSrcUnit + 1, /* surface */
+ inst->TexSrcUnit, /* sampler */
+ inst->DstReg.WriteMask,
+ BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
+ 4,
+ 4,
+ 0);
+}
+
+static void emit_tex(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst[4], src[4], payload_reg;
+ unsigned msg_len;
+ unsigned i, nr;
+ unsigned emit;
+ boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0;
+
+ payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i, 1);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+
+
+ switch (inst->TexSrcTarget) {
+ case TEXTURE_1D_INDEX:
+ emit = WRITEMASK_X;
+ nr = 1;
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ emit = WRITEMASK_XY;
+ nr = 2;
+ break;
+ default:
+ emit = WRITEMASK_XYZ;
+ nr = 3;
+ break;
+ }
+ msg_len = 1;
+
+ for (i = 0; i < nr; i++) {
+ static const unsigned swz[4] = {0,1,2,2};
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
+ else
+ brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
+ msg_len += 1;
+ }
+
+ if (shadow) {
+ brw_MOV(p, brw_message_reg(5), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(6), src[2]);
+ }
+
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(payload_reg, BRW_REGISTER_TYPE_UW),
+ inst->TexSrcUnit + 1, /* surface */
+ inst->TexSrcUnit, /* sampler */
+ inst->DstReg.WriteMask,
+ BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
+ 4,
+ shadow ? 6 : 4,
+ 0);
+
+ if (shadow)
+ brw_MOV(p, dst[3], brw_imm_f(1.0));
+}
+
+static void post_wm_emit( struct brw_wm_compile *c )
+{
+ unsigned nr_insns = c->fp->program.Base.NumInstructions;
+ unsigned insn, target_insn;
+ struct prog_instruction *inst1, *inst2;
+ struct brw_instruction *brw_inst1, *brw_inst2;
+ int offset;
+ for (insn = 0; insn < nr_insns; insn++) {
+ inst1 = &c->fp->program.Base.Instructions[insn];
+ brw_inst1 = inst1->Data;
+ switch (inst1->Opcode) {
+ case OPCODE_CAL:
+ target_insn = inst1->BranchTarget;
+ inst2 = &c->fp->program.Base.Instructions[target_insn];
+ brw_inst2 = inst2->Data;
+ offset = brw_inst2 - brw_inst1;
+ brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void brw_wm_emit_glsl(struct brw_wm_compile *c)
+
+{
+#define MAX_IFSN 32
+#define MAX_LOOP_DEPTH 32
+ struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
+ struct brw_instruction *inst0, *inst1;
+ int i, if_insn = 0, loop_insn = 0;
+ struct brw_compile *p = &c->func;
+ struct brw_indirect stack_index = brw_indirect(0, 0);
+
+ brw_init_compile(&c->func);
+ c->reg_index = 0;
+ prealloc_reg(c);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ struct prog_instruction *inst = &c->prog_instructions[i];
+ struct prog_instruction *orig_inst;
+
+ if ((orig_inst = inst->Data) != 0)
+ orig_inst->Data = current_insn(p);
+
+ if (inst->CondUpdate)
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ else
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+
+ switch (inst->Opcode) {
+ case WM_PIXELXY:
+ emit_pixel_xy(c, inst);
+ break;
+ case WM_DELTAXY:
+ emit_delta_xy(c, inst);
+ break;
+ case WM_PIXELW:
+ emit_pixel_w(c, inst);
+ break;
+ case WM_LINTERP:
+ emit_linterp(c, inst);
+ break;
+ case WM_PINTERP:
+ emit_pinterp(c, inst);
+ break;
+ case WM_CINTERP:
+ emit_cinterp(c, inst);
+ break;
+ case WM_WPOSXY:
+ emit_wpos_xy(c, inst);
+ break;
+ case WM_FB_WRITE:
+ emit_fb_write(c, inst);
+ break;
+ case OPCODE_ABS:
+ emit_abs(c, inst);
+ break;
+ case OPCODE_ADD:
+ emit_add(c, inst);
+ break;
+ case OPCODE_SUB:
+ emit_sub(c, inst);
+ break;
+ case OPCODE_FRC:
+ emit_frc(c, inst);
+ break;
+ case OPCODE_FLR:
+ emit_flr(c, inst);
+ break;
+ case OPCODE_LRP:
+ emit_lrp(c, inst);
+ break;
+ case OPCODE_INT:
+ emit_int(c, inst);
+ break;
+ case OPCODE_MOV:
+ emit_mov(c, inst);
+ break;
+ case OPCODE_DP3:
+ emit_dp3(c, inst);
+ break;
+ case OPCODE_DP4:
+ emit_dp4(c, inst);
+ break;
+ case OPCODE_XPD:
+ emit_xpd(c, inst);
+ break;
+ case OPCODE_DPH:
+ emit_dph(c, inst);
+ break;
+ case OPCODE_RCP:
+ emit_rcp(c, inst);
+ break;
+ case OPCODE_RSQ:
+ emit_rsq(c, inst);
+ break;
+ case OPCODE_SIN:
+ emit_sin(c, inst);
+ break;
+ case OPCODE_COS:
+ emit_cos(c, inst);
+ break;
+ case OPCODE_EX2:
+ emit_ex2(c, inst);
+ break;
+ case OPCODE_LG2:
+ emit_lg2(c, inst);
+ break;
+ case OPCODE_MAX:
+ emit_max(c, inst);
+ break;
+ case OPCODE_MIN:
+ emit_min(c, inst);
+ break;
+ case OPCODE_DDX:
+ emit_ddx(c, inst);
+ break;
+ case OPCODE_DDY:
+ emit_ddy(c, inst);
+ break;
+ case OPCODE_SLT:
+ emit_slt(c, inst);
+ break;
+ case OPCODE_SLE:
+ emit_sle(c, inst);
+ break;
+ case OPCODE_SGT:
+ emit_sgt(c, inst);
+ break;
+ case OPCODE_SGE:
+ emit_sge(c, inst);
+ break;
+ case OPCODE_SEQ:
+ emit_seq(c, inst);
+ break;
+ case OPCODE_SNE:
+ emit_sne(c, inst);
+ break;
+ case OPCODE_MUL:
+ emit_mul(c, inst);
+ break;
+ case OPCODE_POW:
+ emit_pow(c, inst);
+ break;
+ case OPCODE_MAD:
+ emit_mad(c, inst);
+ break;
+ case OPCODE_TEX:
+ emit_tex(c, inst);
+ break;
+ case OPCODE_TXB:
+ emit_txb(c, inst);
+ break;
+ case OPCODE_KIL_NV:
+ emit_kil(c);
+ break;
+ case OPCODE_IF:
+ assert(if_insn < MAX_IFSN);
+ if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ break;
+ case OPCODE_ELSE:
+ if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
+ break;
+ case OPCODE_ENDIF:
+ assert(if_insn > 0);
+ brw_ENDIF(p, if_inst[--if_insn]);
+ break;
+ case OPCODE_BGNSUB:
+ case OPCODE_ENDSUB:
+ break;
+ case OPCODE_CAL:
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(4));
+ orig_inst = inst->Data;
+ orig_inst->Data = &p->store[p->nr_insn];
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_pop_insn_state(p);
+ break;
+
+ case OPCODE_RET:
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_pop_insn_state(p);
+
+ break;
+ case OPCODE_BGNLOOP:
+ loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case OPCODE_BRK:
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_CONT:
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_ENDLOOP:
+ loop_insn--;
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
+ /* patch all the BREAK instructions from
+ last BEGINLOOP */
+ while (inst0 > loop_inst[loop_insn]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+ inst0->bits3.if_else.pop_count = 0;
+ } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ inst0->bits3.if_else.jump_count = inst1 - inst0;
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ break;
+ default:
+ _mesa_printf("unsupported IR in fragment shader %d\n",
+ inst->Opcode);
+ }
+ if (inst->CondUpdate)
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ else
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ post_wm_emit(c);
+ for (i = 0; i < c->fp->program.Base.NumInstructions; i++)
+ c->fp->program.Base.Instructions[i].Data = NULL;
+}
+
+void brw_wm_glsl_emit(struct brw_wm_compile *c)
+{
+ brw_wm_pass_fp(c);
+ c->tmp_index = 127;
+ brw_wm_emit_glsl(c);
+ c->prog_data.total_grf = c->reg_index;
+ c->prog_data.total_scratch = 0;
+}
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_iz.c b/src/mesa/pipe/i965simple/brw_wm_iz.c
new file mode 100644
index 0000000000..6c5f25bf39
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_iz.c
@@ -0,0 +1,214 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_wm.h"
+
+
+#undef P /* prompted depth */
+#undef C /* computed */
+#undef N /* non-promoted? */
+
+#define P 0
+#define C 1
+#define N 2
+
+const struct {
+ unsigned mode:2;
+ unsigned sd_present:1;
+ unsigned sd_to_rt:1;
+ unsigned dd_present:1;
+ unsigned ds_present:1;
+} wm_iz_table[IZ_BIT_MAX] =
+{
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 1, 1, 0, 0 },
+ { C, 1, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 1, 1, 0, 0 },
+ { C, 1, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 0, 0, 1 },
+ { C, 0, 0, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 1, 1, 0, 1 },
+ { C, 1, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 0, 0, 1 },
+ { C, 0, 0, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 1, 1, 0, 1 },
+ { C, 1, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 0, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 }
+};
+
+void brw_wm_lookup_iz( unsigned line_aa,
+ unsigned lookup,
+ struct brw_wm_prog_key *key )
+{
+ unsigned reg = 2;
+
+ assert (lookup < IZ_BIT_MAX);
+
+ if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
+ key->computes_depth = 1;
+
+ if (wm_iz_table[lookup].sd_present) {
+ key->source_depth_reg = reg;
+ reg += 2;
+ }
+
+ if (wm_iz_table[lookup].sd_to_rt)
+ key->source_depth_to_render_target = 1;
+
+ if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
+ key->aa_dest_stencil_reg = reg;
+ key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+ line_aa == AA_SOMETIMES);
+ reg++;
+ }
+
+ if (wm_iz_table[lookup].dd_present) {
+ key->dest_depth_reg = reg;
+ reg+=2;
+ }
+
+ key->nr_depth_regs = (reg+1)/2;
+}
+
diff --git a/src/mesa/pipe/i965simple/brw_wm_pass0.c b/src/mesa/pipe/i965simple/brw_wm_pass0.c
new file mode 100644
index 0000000000..2ec3ac00c1
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_pass0.c
@@ -0,0 +1,466 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+#if 0
+/***********************************************************************
+ */
+
+static struct brw_wm_ref *get_ref( struct brw_wm_compile *c )
+{
+ assert(c->nr_refs < BRW_WM_MAX_REF);
+ return &c->refs[c->nr_refs++];
+}
+
+static struct brw_wm_value *get_value( struct brw_wm_compile *c)
+{
+ assert(c->nr_refs < BRW_WM_MAX_VREG);
+ return &c->vreg[c->nr_vreg++];
+}
+
+static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
+{
+ assert(c->nr_insns < BRW_WM_MAX_INSN);
+ return &c->instruction[c->nr_insns++];
+}
+
+/***********************************************************************
+ */
+
+static void pass0_init_undef( struct brw_wm_compile *c)
+{
+ struct brw_wm_ref *ref = &c->undef_ref;
+ ref->value = &c->undef_value;
+ ref->hw_reg = brw_vec8_grf(0, 0);
+ ref->insn = 0;
+ ref->prevuse = NULL;
+}
+
+static void pass0_set_fpreg_value( struct brw_wm_compile *c,
+ unsigned file,
+ unsigned idx,
+ unsigned component,
+ struct brw_wm_value *value )
+{
+ struct brw_wm_ref *ref = get_ref(c);
+ ref->value = value;
+ ref->hw_reg = brw_vec8_grf(0, 0);
+ ref->insn = 0;
+ ref->prevuse = NULL;
+ c->pass0_fp_reg[file][idx][component] = ref;
+}
+
+static void pass0_set_fpreg_ref( struct brw_wm_compile *c,
+ unsigned file,
+ unsigned idx,
+ unsigned component,
+ const struct brw_wm_ref *src_ref )
+{
+ c->pass0_fp_reg[file][idx][component] = src_ref;
+}
+
+static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
+ const float *param_ptr )
+{
+ unsigned i = c->prog_data.nr_params++;
+
+ if (i >= BRW_WM_MAX_PARAM) {
+ _mesa_printf("%s: out of params\n", __FUNCTION__);
+ c->prog_data.error = 1;
+ return NULL;
+ }
+ else {
+ struct brw_wm_ref *ref = get_ref(c);
+
+ c->prog_data.param[i] = param_ptr;
+ c->nr_creg = (i+16)/16;
+
+ /* Push the offsets into hw_reg. These will be added to the
+ * real register numbers once one is allocated in pass2.
+ */
+ ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8);
+ ref->value = &c->creg[i/16];
+ ref->insn = 0;
+ ref->prevuse = NULL;
+
+ return ref;
+ }
+}
+
+
+static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
+ const float *constval )
+{
+ unsigned i;
+
+ /* Search for an existing const value matching the request:
+ */
+ for (i = 0; i < c->nr_constrefs; i++) {
+ if (c->constref[i].constval == *constval)
+ return c->constref[i].ref;
+ }
+
+ /* Else try to add a new one:
+ */
+ if (c->nr_constrefs < BRW_WM_MAX_CONST) {
+ unsigned i = c->nr_constrefs++;
+
+ /* A constant is a special type of parameter:
+ */
+ c->constref[i].constval = *constval;
+ c->constref[i].ref = get_param_ref(c, constval);
+
+ return c->constref[i].ref;
+ }
+ else {
+ _mesa_printf("%s: out of constrefs\n", __FUNCTION__);
+ c->prog_data.error = 1;
+ return NULL;
+ }
+}
+
+
+/* Lookup our internal registers
+ */
+static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
+ unsigned file,
+ unsigned idx,
+ unsigned component )
+{
+ const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component];
+
+ if (!ref) {
+ switch (file) {
+ case PROGRAM_INPUT:
+ case PROGRAM_PAYLOAD:
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_OUTPUT:
+ case PROGRAM_VARYING:
+ break;
+
+ case PROGRAM_LOCAL_PARAM:
+ ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]);
+ break;
+
+ case PROGRAM_ENV_PARAM:
+ ref = get_param_ref(c, &c->env_param[idx][component]);
+ break;
+
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_UNIFORM:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_NAMED_PARAM: {
+ struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
+
+ /* There's something really hokey about parameters parsed in
+ * arb programs - they all end up in here, whether they be
+ * state values, paramters or constants. This duplicates the
+ * structure above & also seems to subvert the limits set for
+ * each type of constant/param.
+ */
+ switch (plist->Parameters[idx].Type) {
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ /* These are invarient:
+ */
+ ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+ break;
+
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_UNIFORM:
+ /* These may change from run to run:
+ */
+ ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ }
+
+ default:
+ assert(0);
+ break;
+ }
+
+ c->pass0_fp_reg[file][idx][component] = ref;
+ }
+
+ if (!ref)
+ ref = &c->undef_ref;
+
+ return ref;
+}
+
+
+
+
+/***********************************************************************
+ * Straight translation to internal instruction format
+ */
+
+static void pass0_set_dst( struct brw_wm_compile *c,
+ struct brw_wm_instruction *out,
+ const struct prog_instruction *inst,
+ unsigned writemask )
+{
+ const struct prog_dst_register *dst = &inst->DstReg;
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ out->dst[i] = get_value(c);
+
+ pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]);
+ }
+ }
+
+ out->writemask = writemask;
+}
+
+
+static void pass0_set_dst_scalar( struct brw_wm_compile *c,
+ struct brw_wm_instruction *out,
+ const struct prog_instruction *inst,
+ unsigned writemask )
+{
+ if (writemask) {
+ const struct prog_dst_register *dst = &inst->DstReg;
+ unsigned i;
+
+ /* Compute only the first (X) value:
+ */
+ out->writemask = WRITEMASK_X;
+ out->dst[0] = get_value(c);
+
+ /* Update our tracking register file for all the components in
+ * writemask:
+ */
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[0]);
+ }
+ }
+ }
+ else
+ out->writemask = 0;
+}
+
+
+
+static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
+ struct prog_src_register src,
+ unsigned i )
+{
+ unsigned component = GET_SWZ(src.Swizzle,i);
+ const struct brw_wm_ref *src_ref;
+ static const float const_zero = 0.0;
+ static const float const_one = 1.0;
+
+
+ if (component == SWIZZLE_ZERO)
+ src_ref = get_const_ref(c, &const_zero);
+ else if (component == SWIZZLE_ONE)
+ src_ref = get_const_ref(c, &const_one);
+ else
+ src_ref = pass0_get_reg(c, src.File, src.Index, component);
+
+ return src_ref;
+}
+
+
+static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
+ struct prog_src_register src,
+ unsigned i,
+ struct brw_wm_instruction *insn)
+{
+ const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i);
+ struct brw_wm_ref *newref = get_ref(c);
+
+ newref->value = ref->value;
+ newref->hw_reg = ref->hw_reg;
+
+ if (insn) {
+ newref->insn = insn - c->instruction;
+ newref->prevuse = newref->value->lastuse;
+ newref->value->lastuse = newref;
+ }
+
+ if (src.NegateBase & (1<<i))
+ newref->hw_reg.negate ^= 1;
+
+ if (src.Abs) {
+ newref->hw_reg.negate = 0;
+ newref->hw_reg.abs = 1;
+ }
+
+ return newref;
+}
+
+
+
+static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct brw_wm_instruction *out = get_instruction(c);
+ unsigned writemask = inst->DstReg.WriteMask;
+ unsigned nr_args = brw_wm_nr_args(inst->Opcode);
+ unsigned i, j;
+
+ /* Copy some data out of the instruction
+ */
+ out->opcode = inst->Opcode;
+ out->saturate = (inst->SaturateMode != SATURATE_OFF);
+ out->tex_unit = inst->TexSrcUnit;
+ out->tex_idx = inst->TexSrcTarget;
+
+ /* Args:
+ */
+ for (i = 0; i < nr_args; i++) {
+ for (j = 0; j < 4; j++) {
+ out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out);
+ }
+ }
+
+ /* Dst:
+ */
+ if (brw_wm_is_scalar_result(out->opcode))
+ pass0_set_dst_scalar(c, out, inst, writemask);
+ else
+ pass0_set_dst(c, out, inst, writemask);
+
+ return out;
+}
+
+
+
+/***********************************************************************
+ * Optimize moves and swizzles away:
+ */
+static void pass0_precalc_mov( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ const struct prog_dst_register *dst = &inst->DstReg;
+ unsigned writemask = inst->DstReg.WriteMask;
+ unsigned i;
+
+ /* Get the effect of a MOV by manipulating our register table:
+ */
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ pass0_set_fpreg_ref( c, dst->File, dst->Index, i,
+ get_new_ref(c, inst->SrcReg[0], i, NULL));
+ }
+ }
+}
+
+
+/* Initialize payload "registers".
+ */
+static void pass0_init_payload( struct brw_wm_compile *c )
+{
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ unsigned j = i >= c->key.nr_depth_regs ? 0 : i;
+ pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i,
+ &c->payload.depth[j] );
+ }
+
+#if 0
+ /* This seems to be an alternative to the INTERP_WPOS stuff I do
+ * elsewhere:
+ */
+ if (c->key.source_depth_reg)
+ pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2,
+ &c->payload.depth[c->key.source_depth_reg/2]);
+#endif
+
+ for (i = 0; i < FRAG_ATTRIB_MAX; i++)
+ pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0,
+ &c->payload.input_interp[i] );
+}
+
+/***********************************************************************
+ * PASS 0
+ *
+ * Work forwards to give each calculated value a unique number. Where
+ * an instruction produces duplicate values (eg DP3), all are given
+ * the same number.
+ *
+ * Translate away swizzling and eliminate non-saturating moves.
+ */
+void brw_wm_pass0( struct brw_wm_compile *c )
+{
+ unsigned insn;
+
+ c->nr_vreg = 0;
+ c->nr_insns = 0;
+
+ pass0_init_undef(c);
+ pass0_init_payload(c);
+
+ for (insn = 0; insn < c->nr_fp_insns; insn++) {
+ const struct prog_instruction *inst = &c->prog_instructions[insn];
+
+
+ /* Optimize away moves, otherwise emit translated instruction:
+ */
+ switch (inst->Opcode) {
+ case OPCODE_MOV:
+ case OPCODE_SWZ:
+ if (!inst->SaturateMode) {
+ pass0_precalc_mov(c, inst);
+ }
+ else {
+ translate_insn(c, inst);
+ }
+ break;
+
+
+ default:
+ translate_insn(c, inst);
+ break;
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass0");
+ }
+}
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_pass1.c b/src/mesa/pipe/i965simple/brw_wm_pass1.c
new file mode 100644
index 0000000000..e6736bd6df
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_pass1.c
@@ -0,0 +1,277 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+#if 0
+static unsigned get_tracked_mask(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst)
+{
+ unsigned i;
+ for (i = 0; i < 4; i++) {
+ if (inst->writemask & (1<<i)) {
+ if (!inst->dst[i]->contributes_to_output) {
+ inst->writemask &= ~(1<<i);
+ inst->dst[i] = 0;
+ }
+ }
+ }
+
+ return inst->writemask;
+}
+
+/* Remove a reference from a value's usage chain.
+ */
+static void unlink_ref(struct brw_wm_ref *ref)
+{
+ struct brw_wm_value *value = ref->value;
+
+ if (ref == value->lastuse) {
+ value->lastuse = ref->prevuse;
+ } else {
+ struct brw_wm_ref *i = value->lastuse;
+ while (i->prevuse != ref) i = i->prevuse;
+ i->prevuse = ref->prevuse;
+ }
+}
+
+static void track_arg(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst,
+ unsigned arg,
+ unsigned readmask)
+{
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ struct brw_wm_ref *ref = inst->src[arg][i];
+ if (ref) {
+ if (readmask & (1<<i))
+ ref->value->contributes_to_output = 1;
+ else {
+ unlink_ref(ref);
+ inst->src[arg][i] = NULL;
+ }
+ }
+ }
+}
+
+static unsigned get_texcoord_mask( unsigned tex_idx )
+{
+ switch (tex_idx) {
+ case TEXTURE_1D_INDEX: return WRITEMASK_X;
+ case TEXTURE_2D_INDEX: return WRITEMASK_XY;
+ case TEXTURE_3D_INDEX: return WRITEMASK_XYZ;
+ case TEXTURE_CUBE_INDEX: return WRITEMASK_XYZ;
+ case TEXTURE_RECT_INDEX: return WRITEMASK_XY;
+ default: return 0;
+ }
+}
+
+/* Step two: Basically this is dead code elimination.
+ *
+ * Iterate backwards over instructions, noting which values
+ * contribute to the final result. Adjust writemasks to only
+ * calculate these values.
+ */
+void brw_wm_pass1( struct brw_wm_compile *c )
+{
+ int insn;
+
+ for (insn = c->nr_insns-1; insn >= 0; insn--) {
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+ unsigned writemask;
+ unsigned read0, read1, read2;
+
+ if (inst->opcode == OPCODE_KIL) {
+ track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */
+ continue;
+ }
+
+ if (inst->opcode == WM_FB_WRITE) {
+ track_arg(c, inst, 0, WRITEMASK_XYZW);
+ track_arg(c, inst, 1, WRITEMASK_XYZW);
+ if (c->key.source_depth_to_render_target &&
+ c->key.computes_depth)
+ track_arg(c, inst, 2, WRITEMASK_Z);
+ else
+ track_arg(c, inst, 2, 0);
+ continue;
+ }
+
+ /* Lookup all the registers which were written by this
+ * instruction and get a mask of those that contribute to the output:
+ */
+ writemask = get_tracked_mask(c, inst);
+ if (!writemask) {
+ unsigned arg;
+ for (arg = 0; arg < 3; arg++)
+ track_arg(c, inst, arg, 0);
+ continue;
+ }
+
+ read0 = 0;
+ read1 = 0;
+ read2 = 0;
+
+ /* Mark all inputs which contribute to the marked outputs:
+ */
+ switch (inst->opcode) {
+ case OPCODE_ABS:
+ case OPCODE_FLR:
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ read0 = writemask;
+ break;
+
+ case OPCODE_SUB:
+ case OPCODE_SLT:
+ case OPCODE_SLE:
+ case OPCODE_SGE:
+ case OPCODE_SGT:
+ case OPCODE_SEQ:
+ case OPCODE_SNE:
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ read0 = writemask;
+ read1 = writemask;
+ break;
+
+ case OPCODE_MAD:
+ case OPCODE_CMP:
+ case OPCODE_LRP:
+ read0 = writemask;
+ read1 = writemask;
+ read2 = writemask;
+ break;
+
+ case OPCODE_XPD:
+ if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ;
+ if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ;
+ if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY;
+ read1 = read0;
+ break;
+
+ case OPCODE_COS:
+ case OPCODE_EX2:
+ case OPCODE_LG2:
+ case OPCODE_RCP:
+ case OPCODE_RSQ:
+ case OPCODE_SIN:
+ case OPCODE_SCS:
+ case WM_CINTERP:
+ case WM_PIXELXY:
+ read0 = WRITEMASK_X;
+ break;
+
+ case OPCODE_POW:
+ read0 = WRITEMASK_X;
+ read1 = WRITEMASK_X;
+ break;
+
+ case OPCODE_TEX:
+ read0 = get_texcoord_mask(inst->tex_idx);
+
+ if (c->key.shadowtex_mask & (1<<inst->tex_unit))
+ read0 |= WRITEMASK_Z;
+ break;
+
+ case OPCODE_TXB:
+ /* Shadow ignored for txb.
+ */
+ read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W;
+ break;
+
+ case WM_WPOSXY:
+ read0 = writemask & WRITEMASK_XY;
+ break;
+
+ case WM_DELTAXY:
+ read0 = writemask & WRITEMASK_XY;
+ read1 = WRITEMASK_X;
+ break;
+
+ case WM_PIXELW:
+ read0 = WRITEMASK_X;
+ read1 = WRITEMASK_XY;
+ break;
+
+ case WM_LINTERP:
+ read0 = WRITEMASK_X;
+ read1 = WRITEMASK_XY;
+ break;
+
+ case WM_PINTERP:
+ read0 = WRITEMASK_X; /* interpolant */
+ read1 = WRITEMASK_XY; /* deltas */
+ read2 = WRITEMASK_W; /* pixel w */
+ break;
+
+ case OPCODE_DP3:
+ read0 = WRITEMASK_XYZ;
+ read1 = WRITEMASK_XYZ;
+ break;
+
+ case OPCODE_DPH:
+ read0 = WRITEMASK_XYZ;
+ read1 = WRITEMASK_XYZW;
+ break;
+
+ case OPCODE_DP4:
+ read0 = WRITEMASK_XYZW;
+ read1 = WRITEMASK_XYZW;
+ break;
+
+ case OPCODE_LIT:
+ read0 = WRITEMASK_XYW;
+ break;
+
+ case OPCODE_SWZ:
+ case OPCODE_DST:
+ case OPCODE_TXP:
+ default:
+ break;
+ }
+
+ track_arg(c, inst, 0, read0);
+ track_arg(c, inst, 1, read1);
+ track_arg(c, inst, 2, read2);
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass1");
+ }
+}
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_pass2.c b/src/mesa/pipe/i965simple/brw_wm_pass2.c
new file mode 100644
index 0000000000..b1b404706f
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_pass2.c
@@ -0,0 +1,335 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+#if 0
+/* Use these to force spilling so that that functionality can be
+ * tested with known-good examples rather than having to construct new
+ * tests.
+ */
+#define TEST_PAYLOAD_SPILLS 0
+#define TEST_DST_SPILLS 0
+
+static void spill_value(struct brw_wm_compile *c,
+ struct brw_wm_value *value);
+
+static void prealloc_reg(struct brw_wm_compile *c,
+ struct brw_wm_value *value,
+ unsigned reg)
+{
+ if (value->lastuse) {
+ /* Set nextuse to zero, it will be corrected by
+ * update_register_usage().
+ */
+ c->pass2_grf[reg].value = value;
+ c->pass2_grf[reg].nextuse = 0;
+
+ value->resident = &c->pass2_grf[reg];
+ value->hw_reg = brw_vec8_grf(reg*2, 0);
+
+ if (TEST_PAYLOAD_SPILLS)
+ spill_value(c, value);
+ }
+}
+
+
+/* Initialize all the register values. Do the initial setup
+ * calculations for interpolants.
+ */
+static void init_registers( struct brw_wm_compile *c )
+{
+ unsigned inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
+ unsigned nr_interp_regs = 0;
+ unsigned i = 0;
+ unsigned j;
+
+ for (j = 0; j < c->grf_limit; j++)
+ c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
+
+ for (j = 0; j < c->key.nr_depth_regs; j++)
+ prealloc_reg(c, &c->payload.depth[j], i++);
+
+ for (j = 0; j < c->nr_creg; j++)
+ prealloc_reg(c, &c->creg[j], i++);
+
+ for (j = 0; j < FRAG_ATTRIB_MAX; j++)
+ if (inputs & (1<<j)) {
+ nr_interp_regs++;
+ prealloc_reg(c, &c->payload.input_interp[j], i++);
+ }
+
+ assert(nr_interp_regs >= 1);
+
+ c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.urb_read_length = nr_interp_regs * 2;
+ c->prog_data.curb_read_length = c->nr_creg * 2;
+
+ c->max_wm_grf = i * 2;
+}
+
+
+/* Update the nextuse value for each register in our file.
+ */
+static void update_register_usage(struct brw_wm_compile *c,
+ unsigned thisinsn)
+{
+ unsigned i;
+
+ for (i = 1; i < c->grf_limit; i++) {
+ struct brw_wm_grf *grf = &c->pass2_grf[i];
+
+ /* Only search those which can change:
+ */
+ if (grf->nextuse < thisinsn) {
+ struct brw_wm_ref *ref = grf->value->lastuse;
+
+ /* Has last use of value been passed?
+ */
+ if (ref->insn < thisinsn) {
+ grf->value->resident = 0;
+ grf->value = 0;
+ grf->nextuse = BRW_WM_MAX_INSN;
+ }
+ else {
+ /* Else loop through chain to update:
+ */
+ while (ref->prevuse && ref->prevuse->insn >= thisinsn)
+ ref = ref->prevuse;
+
+ grf->nextuse = ref->insn;
+ }
+ }
+ }
+}
+
+
+static void spill_value(struct brw_wm_compile *c,
+ struct brw_wm_value *value)
+{
+ /* Allocate a spill slot. Note that allocations start from 0x40 -
+ * the first slot is reserved to mean "undef" in brw_wm_emit.c
+ */
+ if (!value->spill_slot) {
+ c->last_scratch += 0x40;
+ value->spill_slot = c->last_scratch;
+ }
+
+ /* The spill will be done in brw_wm_emit.c immediately after the
+ * value is calculated, so we can just take this reg without any
+ * further work.
+ */
+ value->resident->value = NULL;
+ value->resident->nextuse = BRW_WM_MAX_INSN;
+ value->resident = NULL;
+}
+
+
+
+/* Search for contiguous region with the most distant nearest
+ * member. Free regs count as very distant.
+ *
+ * TODO: implement spill-to-reg so that we can rearrange discontigous
+ * free regs and then spill the oldest non-free regs in sequence.
+ * This would mean inserting instructions in this pass.
+ */
+static unsigned search_contiguous_regs(struct brw_wm_compile *c,
+ unsigned nr,
+ unsigned thisinsn)
+{
+ struct brw_wm_grf *grf = c->pass2_grf;
+ unsigned furthest = 0;
+ unsigned reg = 0;
+ unsigned i, j;
+
+ /* Start search at 1: r0 is special and can't be used or spilled.
+ */
+ for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) {
+ unsigned group_nextuse = BRW_WM_MAX_INSN;
+
+ for (j = 0; j < nr; j++) {
+ if (grf[i+j].nextuse < group_nextuse)
+ group_nextuse = grf[i+j].nextuse;
+ }
+
+ if (group_nextuse > furthest) {
+ furthest = group_nextuse;
+ reg = i;
+ }
+ }
+
+ assert(furthest != thisinsn);
+
+ /* Any non-empty regs will need to be spilled:
+ */
+ for (j = 0; j < nr; j++)
+ if (grf[reg+j].value)
+ spill_value(c, grf[reg+j].value);
+
+ return reg;
+}
+
+
+static void alloc_contiguous_dest(struct brw_wm_compile *c,
+ struct brw_wm_value *dst[],
+ unsigned nr,
+ unsigned thisinsn)
+{
+ unsigned reg = search_contiguous_regs(c, nr, thisinsn);
+ unsigned i;
+
+ for (i = 0; i < nr; i++) {
+ if (!dst[i]) {
+ /* Need to grab a dummy value in TEX case. Don't introduce
+ * it into the tracking scheme.
+ */
+ dst[i] = &c->vreg[c->nr_vreg++];
+ }
+ else {
+ assert(!dst[i]->resident);
+ assert(c->pass2_grf[reg+i].nextuse != thisinsn);
+
+ c->pass2_grf[reg+i].value = dst[i];
+ c->pass2_grf[reg+i].nextuse = thisinsn;
+
+ dst[i]->resident = &c->pass2_grf[reg+i];
+ }
+
+ dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0);
+ }
+
+ if ((reg+nr)*2 > c->max_wm_grf)
+ c->max_wm_grf = (reg+nr) * 2;
+}
+
+
+static void load_args(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst)
+{
+ unsigned thisinsn = inst - c->instruction;
+ unsigned i,j;
+
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 4; j++) {
+ struct brw_wm_ref *ref = inst->src[i][j];
+
+ if (ref) {
+ if (!ref->value->resident) {
+ /* Need to bring the value in from scratch space. The code for
+ * this will be done in brw_wm_emit.c, here we just do the
+ * register allocation and mark the ref as requiring a fill.
+ */
+ unsigned reg = search_contiguous_regs(c, 1, thisinsn);
+
+ c->pass2_grf[reg].value = ref->value;
+ c->pass2_grf[reg].nextuse = thisinsn;
+
+ ref->value->resident = &c->pass2_grf[reg];
+
+ /* Note that a fill is required:
+ */
+ ref->unspill_reg = reg*2;
+ }
+
+ /* Adjust the hw_reg to point at the value's current location:
+ */
+ assert(ref->value == ref->value->resident->value);
+ ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2;
+ }
+ }
+ }
+}
+
+
+
+/* Step 3: Work forwards once again. Perform register allocations,
+ * taking into account instructions like TEX which require contiguous
+ * result registers. Where necessary spill registers to scratch space
+ * and reload later.
+ */
+void brw_wm_pass2( struct brw_wm_compile *c )
+{
+ unsigned insn;
+ unsigned i;
+
+ init_registers(c);
+
+ for (insn = 0; insn < c->nr_insns; insn++) {
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+
+ /* Update registers' nextuse values:
+ */
+ update_register_usage(c, insn);
+
+ /* May need to unspill some args.
+ */
+ load_args(c, inst);
+
+ /* Allocate registers to hold results:
+ */
+ switch (inst->opcode) {
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ case OPCODE_TXP:
+ alloc_contiguous_dest(c, inst->dst, 4, insn);
+ break;
+
+ default:
+ for (i = 0; i < 4; i++) {
+ if (inst->writemask & (1<<i)) {
+ assert(inst->dst[i]);
+ alloc_contiguous_dest(c, &inst->dst[i], 1, insn);
+ }
+ }
+ break;
+ }
+
+ if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY)
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i])
+ spill_value(c, inst->dst[i]);
+
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass2");
+ }
+
+ c->state = PASS2_DONE;
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass2/done");
+ }
+}
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_sampler_state.c b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c
new file mode 100644
index 0000000000..4ea0dd7db0
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c
@@ -0,0 +1,275 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#include "pipe/p_util.h"
+
+
+#define COMPAREFUNC_ALWAYS 0
+#define COMPAREFUNC_NEVER 0x1
+#define COMPAREFUNC_LESS 0x2
+#define COMPAREFUNC_EQUAL 0x3
+#define COMPAREFUNC_LEQUAL 0x4
+#define COMPAREFUNC_GREATER 0x5
+#define COMPAREFUNC_NOTEQUAL 0x6
+#define COMPAREFUNC_GEQUAL 0x7
+
+/* Samplers aren't strictly wm state from the hardware's perspective,
+ * but that is the only situation in which we use them in this driver.
+ */
+
+static int intel_translate_shadow_compare_func(unsigned func)
+{
+ switch(func) {
+ case PIPE_FUNC_NEVER:
+ return COMPAREFUNC_ALWAYS;
+ case PIPE_FUNC_LESS:
+ return COMPAREFUNC_LEQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return COMPAREFUNC_LESS;
+ case PIPE_FUNC_GREATER:
+ return COMPAREFUNC_GEQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return COMPAREFUNC_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return COMPAREFUNC_EQUAL;
+ case PIPE_FUNC_EQUAL:
+ return COMPAREFUNC_NOTEQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return COMPAREFUNC_NEVER;
+ }
+
+ fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+ return COMPAREFUNC_NEVER;
+}
+
+/* The brw (and related graphics cores) do not support GL_CLAMP. The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static unsigned translate_wrap_mode( int wrap )
+{
+ switch( wrap ) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return BRW_TEXCOORDMODE_WRAP;
+ case PIPE_TEX_WRAP_CLAMP:
+ return BRW_TEXCOORDMODE_CLAMP;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return BRW_TEXCOORDMODE_CLAMP_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return BRW_TEXCOORDMODE_MIRROR;
+ default:
+ return BRW_TEXCOORDMODE_WRAP;
+ }
+}
+
+
+static unsigned U_FIXED(float value, unsigned frac_bits)
+{
+ value *= (1<<frac_bits);
+ return value < 0 ? 0 : value;
+}
+
+static int S_FIXED(float value, unsigned frac_bits)
+{
+ return value * (1<<frac_bits);
+}
+
+
+static unsigned upload_default_color( struct brw_context *brw,
+ const float *color )
+{
+ struct brw_sampler_default_color sdc;
+
+ COPY_4V(sdc.color, color);
+
+ return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc );
+}
+
+
+/*
+ */
+static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_sampler,
+ unsigned sdc_gs_offset,
+ struct brw_sampler_state *sampler)
+{
+ memset(sampler, 0, sizeof(*sampler));
+
+ switch (pipe_sampler->min_mip_filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+ break;
+ default:
+ break;
+ }
+
+ switch (pipe_sampler->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+ break;
+ default:
+ break;
+ }
+ /* Set Anisotropy:
+ */
+ if (pipe_sampler->max_anisotropy > 1.0) {
+ sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
+ sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+
+ if (pipe_sampler->max_anisotropy > 2.0) {
+ sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2,
+ BRW_ANISORATIO_16);
+ }
+ }
+ else {
+ switch (pipe_sampler->mag_img_filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+ break;
+ default:
+ break;
+ }
+ }
+
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_s);
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_r);
+ sampler->ss1.t_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_t);
+
+ /* Fulsim complains if I don't do this. Hardware doesn't mind:
+ */
+#if 0
+ if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) {
+ sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ }
+#endif
+
+ /* Set shadow function:
+ */
+ if (pipe_sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ /* Shadowing is "enabled" by emitting a particular sampler
+ * message (sample_c). So need to recompile WM program when
+ * shadow comparison is enabled on each/any texture unit.
+ */
+ sampler->ss0.shadow_function = intel_translate_shadow_compare_func(pipe_sampler->compare_func);
+ }
+
+ /* Set LOD bias:
+ */
+ sampler->ss0.lod_bias = S_FIXED(CLAMP(pipe_sampler->lod_bias, -16, 15), 6);
+
+ sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+ /* Set BaseMipLevel, MaxLOD, MinLOD:
+ *
+ * XXX: I don't think that using firstLevel, lastLevel works,
+ * because we always setup the surface state as if firstLevel ==
+ * level zero. Probably have to subtract firstLevel from each of
+ * these:
+ */
+ sampler->ss0.base_level = U_FIXED(0, 1);
+
+ sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(pipe_sampler->max_lod, 0), 13), 6);
+ sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(pipe_sampler->min_lod, 0), 13), 6);
+
+ sampler->ss2.default_color_pointer = sdc_gs_offset >> 5;
+}
+
+
+
+/* All samplers must be uploaded in a single contiguous array, which
+ * complicates various things. However, this is still too confusing -
+ * FIXME: simplify all the different new texture state flags.
+ */
+void brw_upload_wm_samplers(struct brw_context *brw)
+{
+ unsigned unit;
+ unsigned sampler_count = 0;
+
+ /* _NEW_TEXTURE */
+ for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
+ if (brw->attribs.Samplers[unit]) { /* FIXME: correctly detect enabled ones */
+ const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit];
+ unsigned sdc_gs_offset = upload_default_color(brw, sampler->border_color);
+
+ brw_update_sampler_state(sampler,
+ sdc_gs_offset,
+ &brw->wm.sampler[unit]);
+
+ sampler_count = unit + 1;
+ }
+ }
+
+ if (brw->wm.sampler_count != sampler_count) {
+ brw->wm.sampler_count = sampler_count;
+ brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+ }
+
+ brw->wm.sampler_gs_offset = 0;
+
+ if (brw->wm.sampler_count)
+ brw->wm.sampler_gs_offset =
+ brw_cache_data_sz(&brw->cache[BRW_SAMPLER],
+ brw->wm.sampler,
+ sizeof(struct brw_sampler_state) * brw->wm.sampler_count);
+}
+
+#if 0
+const struct brw_tracked_state brw_wm_samplers = {
+ .dirty = {
+ .mesa = _NEW_TEXTURE,
+ .brw = 0,
+ .cache = 0
+ },
+ .update = upload_wm_samplers
+};
+#endif
+
diff --git a/src/mesa/pipe/i965simple/brw_wm_state.c b/src/mesa/pipe/i965simple/brw_wm_state.c
new file mode 100644
index 0000000000..b45fb2f56b
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_state.c
@@ -0,0 +1,202 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+
+/***********************************************************************
+ * WM unit - fragment programs and rasterization
+ */
+
+#if 0
+static void upload_wm_unit(struct brw_context *brw )
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_wm_unit_state wm;
+ unsigned max_threads;
+ unsigned per_thread;
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ max_threads = 0;
+ else
+ max_threads = 31;
+
+
+ memset(&wm, 0, sizeof(wm));
+
+ /* CACHE_NEW_WM_PROG */
+ wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1;
+ wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6;
+ wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+ wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+ wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length;
+
+ wm.wm5.max_threads = max_threads;
+
+ per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024);
+ assert(per_thread <= 12 * 1024);
+
+ if (brw->wm.prog_data->total_scratch) {
+ unsigned total = per_thread * (max_threads + 1);
+
+ /* Scratch space -- just have to make sure there is sufficient
+ * allocated for the active program and current number of threads.
+ */
+ brw->wm.scratch_buffer_size = total;
+ if (brw->wm.scratch_buffer &&
+ brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) {
+ dri_bo_unreference(brw->wm.scratch_buffer);
+ brw->wm.scratch_buffer = NULL;
+ }
+ if (!brw->wm.scratch_buffer) {
+ brw->wm.scratch_buffer = dri_bo_alloc(intel->intelScreen->bufmgr,
+ "wm scratch",
+ brw->wm.scratch_buffer_size,
+ 4096, DRM_BO_FLAG_MEM_TT);
+ }
+ }
+ /* XXX: Scratch buffers are not implemented correectly.
+ *
+ * The scratch offset to be programmed into wm is relative to the general
+ * state base address. However, using dri_bo_alloc/dri_bo_emit_reloc (or
+ * the previous bmGenBuffers scheme), we get an offset relative to the
+ * start of framebuffer. Even before then, it was broken in other ways,
+ * so just fail for now if we hit that path.
+ */
+ assert(brw->wm.prog_data->total_scratch == 0);
+
+ /* CACHE_NEW_SURFACE */
+ wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
+
+ wm.thread3.urb_entry_read_offset = 0;
+ wm.thread1.depth_coef_urb_read_offset = 1;
+ wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+ /* CACHE_NEW_SAMPLER */
+ wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4;
+ wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ {
+ const struct gl_fragment_program *fp = brw->fragment_program;
+
+ if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS))
+ wm.wm5.program_uses_depth = 1; /* as far as we can tell */
+
+ if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR))
+ wm.wm5.program_computes_depth = 1;
+
+ /* _NEW_COLOR */
+ if (fp->UsesKill ||
+ brw->attribs.Color->AlphaEnabled)
+ wm.wm5.program_uses_killpixel = 1;
+
+ if (brw_wm_is_glsl(fp))
+ wm.wm5.enable_8_pix = 1;
+ else
+ wm.wm5.enable_16_pix = 1;
+ }
+
+ wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
+ wm.wm5.legacy_line_rast = 0;
+ wm.wm5.legacy_global_depth_bias = 0;
+ wm.wm5.early_depth_test = 1; /* never need to disable */
+ wm.wm5.line_aa_region_width = 0;
+ wm.wm5.line_endcap_aa_region_width = 1;
+
+ /* _NEW_POLYGONSTIPPLE */
+ if (brw->attribs.Polygon->StippleFlag)
+ wm.wm5.polygon_stipple = 1;
+
+ /* _NEW_POLYGON */
+ if (brw->attribs.Polygon->OffsetFill) {
+ wm.wm5.depth_offset = 1;
+ /* Something wierd going on with legacy_global_depth_bias,
+ * offset_constant, scaling and MRD. This value passes glean
+ * but gives some odd results elsewere (eg. the
+ * quad-offset-units test).
+ */
+ wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2;
+
+ /* This is the only value that passes glean:
+ */
+ wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor;
+ }
+
+ /* _NEW_LINE */
+ if (brw->attribs.Line->StippleFlag) {
+ wm.wm5.line_stipple = 1;
+ }
+
+ if (BRW_DEBUG & DEBUG_STATS || intel->stats_wm)
+ wm.wm4.stats_enable = 1;
+
+ brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm );
+
+ if (brw->wm.prog_data->total_scratch) {
+ /*
+ dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer,
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
+ (per_thread / 1024) - 1,
+ brw->wm.state_gs_offset +
+ ((char *)&wm.thread2 - (char *)&wm),
+ brw->wm.scratch_buffer);
+ */
+ } else {
+ wm.thread2.scratch_space_base_pointer = 0;
+ }
+}
+
+const struct brw_tracked_state brw_wm_unit = {
+ .dirty = {
+ .mesa = (_NEW_POLYGON |
+ _NEW_POLYGONSTIPPLE |
+ _NEW_LINE |
+ _NEW_COLOR),
+
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_LOCK),
+
+ .cache = (CACHE_NEW_SURFACE |
+ CACHE_NEW_WM_PROG |
+ CACHE_NEW_SAMPLER)
+ },
+ .update = upload_wm_unit
+};
+
+#endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_surface_state.c b/src/mesa/pipe/i965simple/brw_wm_surface_state.c
new file mode 100644
index 0000000000..c04fd57494
--- /dev/null
+++ b/src/mesa/pipe/i965simple/brw_wm_surface_state.c
@@ -0,0 +1,294 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+#if 0
+static unsigned translate_tex_target( int target )
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ return BRW_SURFACE_1D;
+
+ case PIPE_TEXTURE_2D:
+ return BRW_SURFACE_2D;
+
+ case PIPE_TEXTURE_3D:
+ return BRW_SURFACE_3D;
+
+ case PIPE_TEXTURE_CUBE:
+ return BRW_SURFACE_CUBE;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned translate_tex_format( unsigned mesa_format )
+{
+ switch( mesa_format ) {
+ case PIPE_FORMAT_U_L8:
+ return BRW_SURFACEFORMAT_L8_UNORM;
+
+ case PIPE_FORMAT_U_I8:
+ return BRW_SURFACEFORMAT_I8_UNORM;
+
+ case PIPE_FORMAT_U_A8:
+ return BRW_SURFACEFORMAT_A8_UNORM;
+
+ case PIPE_FORMAT_U_A8_L8:
+ return BRW_SURFACEFORMAT_L8A8_UNORM;
+
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ assert(0); /* not supported for sampling */
+ return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+
+ case PIPE_FORMAT_U_A8_R8_G8_B8:
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ case PIPE_FORMAT_RGBA8888_REV:
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_U_R5_G6_B5:
+ return BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+
+ case PIPE_FORMAT_YCBCR_REV:
+ return BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+ case PIPE_FORMAT_YCBCR:
+ return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
+
+ case PIPE_FORMAT_RGB_FXT1:
+ case PIPE_FORMAT_RGBA_FXT1:
+ return BRW_SURFACEFORMAT_FXT1;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ return BRW_SURFACEFORMAT_I16_UNORM;
+
+ case PIPE_FORMAT_RGB_DXT1:
+ return BRW_SURFACEFORMAT_DXT1_RGB;
+
+ case PIPE_FORMAT_RGBA_DXT1:
+ return BRW_SURFACEFORMAT_BC1_UNORM;
+
+ case PIPE_FORMAT_RGBA_DXT3:
+ return BRW_SURFACEFORMAT_BC2_UNORM;
+
+ case PIPE_FORMAT_RGBA_DXT5:
+ return BRW_SURFACEFORMAT_BC3_UNORM;
+
+ case PIPE_FORMAT_SRGBA8:
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB;
+ case PIPE_FORMAT_SRGB_DXT1:
+ return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static
+void brw_update_texture_surface( GLcontext *ctx,
+ unsigned unit )
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current;
+ struct intel_texture_object *intelObj = intel_texture_object(tObj);
+ struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+ struct brw_surface_state surf;
+
+ memset(&surf, 0, sizeof(surf));
+
+ surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ surf.ss0.surface_type = translate_tex_target(tObj->Target);
+ surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat);
+
+ /* This is ok for all textures with channel width 8bit or less:
+ */
+/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+
+ /* Updated in emit_reloc */
+ surf.ss1.base_addr = intelObj->mt->region->buffer->offset;
+
+ surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel;
+ surf.ss2.width = firstImage->Width - 1;
+ surf.ss2.height = firstImage->Height - 1;
+
+ surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ surf.ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */
+ surf.ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1;
+ surf.ss3.depth = firstImage->Depth - 1;
+
+ surf.ss4.min_lod = 0;
+
+ if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+ surf.ss0.cube_pos_x = 1;
+ surf.ss0.cube_pos_y = 1;
+ surf.ss0.cube_pos_z = 1;
+ surf.ss0.cube_neg_x = 1;
+ surf.ss0.cube_neg_y = 1;
+ surf.ss0.cube_neg_z = 1;
+ }
+
+ brw->wm.bind.surf_ss_offset[unit + 1] =
+ brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
+}
+
+
+
+#define OFFSET(TYPE, FIELD) ( (unsigned)&(((TYPE *)0)->FIELD) )
+
+
+static void upload_wm_surfaces(struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
+ unsigned i;
+
+ {
+ struct brw_surface_state surf;
+ struct intel_region *region = brw->state.draw_region;
+
+ memset(&surf, 0, sizeof(surf));
+
+ if (region != NULL) {
+ if (region->cpp == 4)
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ else
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+ surf.ss0.surface_type = BRW_SURFACE_2D;
+
+ surf.ss2.width = region->pitch - 1; /* XXX: not really! */
+ surf.ss2.height = region->height - 1;
+ surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ surf.ss3.tiled_surface = region->tiled;
+ surf.ss3.pitch = (region->pitch * region->cpp) - 1;
+ } else {
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ surf.ss0.surface_type = BRW_SURFACE_NULL;
+ }
+
+ /* _NEW_COLOR */
+ surf.ss0.color_blend = (!brw->attribs.Color->_LogicOpEnabled &&
+ brw->attribs.Color->BlendEnabled);
+
+
+ surf.ss0.writedisable_red = !brw->attribs.Color->ColorMask[0];
+ surf.ss0.writedisable_green = !brw->attribs.Color->ColorMask[1];
+ surf.ss0.writedisable_blue = !brw->attribs.Color->ColorMask[2];
+ surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3];
+
+ brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
+
+ brw->wm.nr_surfaces = 1;
+ }
+
+
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i];
+
+ /* _NEW_TEXTURE, BRW_NEW_TEXDATA
+ */
+ if (texUnit->_ReallyEnabled &&
+ intel_finalize_mipmap_tree(intel, i)) {
+
+ brw_update_texture_surface(ctx, i);
+
+ brw->wm.nr_surfaces = i+2;
+ }
+ else if( texUnit->_ReallyEnabled &&
+ texUnit->_Current == intel->frame_buffer_texobj )
+ {
+ brw->wm.bind.surf_ss_offset[i+1] = brw->wm.bind.surf_ss_offset[0];
+ brw->wm.nr_surfaces = i+2;
+ }
+ else {
+ brw->wm.bind.surf_ss_offset[i+1] = 0;
+ }
+ }
+
+ brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND],
+ &brw->wm.bind );
+}
+
+static void emit_reloc_wm_surfaces(struct brw_context *brw)
+{
+ int unit;
+
+ if (brw->state.draw_region != NULL) {
+ /* Emit framebuffer relocation */
+ dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE),
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
+ 0,
+ brw->wm.bind.surf_ss_offset[0] +
+ offsetof(struct brw_surface_state, ss1),
+ brw->state.draw_region->buffer);
+ }
+
+ /* Emit relocations for texture buffers */
+ for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
+ struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit];
+ struct gl_texture_object *tObj = texUnit->_Current;
+ struct intel_texture_object *intelObj = intel_texture_object(tObj);
+
+ if (texUnit->_ReallyEnabled && intelObj->mt != NULL) {
+ dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE),
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+ 0,
+ brw->wm.bind.surf_ss_offset[unit + 1] +
+ offsetof(struct brw_surface_state, ss1),
+ intelObj->mt->region->buffer);
+ }
+ }
+}
+
+const struct brw_tracked_state brw_wm_surfaces = {
+ .dirty = {
+ .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .update = upload_wm_surfaces,
+ .emit_reloc = emit_reloc_wm_surfaces,
+};
+#endif
diff --git a/src/mesa/pipe/llvm/gallivm.cpp b/src/mesa/pipe/llvm/gallivm.cpp
index bd8bfac208..49bbf753c4 100644
--- a/src/mesa/pipe/llvm/gallivm.cpp
+++ b/src/mesa/pipe/llvm/gallivm.cpp
@@ -37,12 +37,12 @@
#include "storage.h"
#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
#include "pipe/tgsi/exec/tgsi_exec.h"
-#include "pipe/tgsi/exec/tgsi_token.h"
-#include "pipe/tgsi/exec/tgsi_build.h"
-#include "pipe/tgsi/exec/tgsi_util.h"
-#include "pipe/tgsi/exec/tgsi_parse.h"
-#include "pipe/tgsi/exec/tgsi_dump.h"
+#include "pipe/tgsi/util/tgsi_util.h"
+#include "pipe/tgsi/util/tgsi_build.h"
+#include "pipe/tgsi/util/tgsi_dump.h"
#include <llvm/Module.h>
#include <llvm/CallingConv.h>
@@ -157,6 +157,7 @@ add_interpolator(struct gallivm_prog *prog,
prog->interpolators[prog->num_interp] = *interp;
++prog->num_interp;
}
+
static void
translate_declaration(struct gallivm_prog *prog,
llvm::Module *module,
@@ -432,7 +433,11 @@ translate_instruction(llvm::Module *module,
break;
case TGSI_OPCODE_DDY:
break;
- case TGSI_OPCODE_KILP:
+ case TGSI_OPCODE_KILP: {
+ out = instr->kilp(inputs[0]);
+ storage->setKilElement(out);
+ return;
+ }
break;
case TGSI_OPCODE_PK2H:
break;
@@ -466,8 +471,6 @@ translate_instruction(llvm::Module *module,
break;
case TGSI_OPCODE_TXD:
break;
- case TGSI_OPCODE_TXP:
- break;
case TGSI_OPCODE_UP2H:
break;
case TGSI_OPCODE_UP2US:
@@ -485,11 +488,7 @@ translate_instruction(llvm::Module *module,
case TGSI_OPCODE_BRA:
break;
case TGSI_OPCODE_CAL: {
- instr->cal(inst->InstructionExtLabel.Label,
- storage->outputPtr(),
- storage->inputPtr(),
- storage->constPtr(),
- storage->tempPtr());
+ instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr());
return;
}
break;
@@ -740,14 +739,8 @@ tgsi_to_llvm(struct gallivm_prog *prog, const struct tgsi_token *tokens)
shader->setName(func_name.c_str());
Function::arg_iterator args = shader->arg_begin();
- Value *ptr_OUT = args++;
- ptr_OUT->setName("OUT");
- Value *ptr_IN = args++;
- ptr_IN->setName("IN");
- Value *ptr_CONST = args++;
- ptr_CONST->setName("CONST");
- Value *ptr_TEMPS = args++;
- ptr_TEMPS->setName("TEMPS");
+ Value *ptr_INPUT = args++;
+ ptr_INPUT->setName("input");
BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
@@ -755,7 +748,7 @@ tgsi_to_llvm(struct gallivm_prog *prog, const struct tgsi_token *tokens)
fi = tgsi_default_full_instruction();
fd = tgsi_default_full_declaration();
- Storage storage(label_entry, ptr_OUT, ptr_IN, ptr_CONST, ptr_TEMPS);
+ Storage storage(label_entry, ptr_INPUT);
Instructions instr(mod, shader, label_entry, &storage);
while(!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);
@@ -927,25 +920,21 @@ typedef int (*fragment_shader_runner)(float x, float y,
float (*inputs)[16][4],
int num_attribs,
float (*consts)[4], int num_consts,
- struct tgsi_sampler *samplers,
- unsigned *sampler_units);
+ struct tgsi_sampler *samplers);
int gallivm_fragment_shader_exec(struct gallivm_prog *prog,
float fx, float fy,
float (*dests)[16][4],
float (*inputs)[16][4],
float (*consts)[4],
- struct tgsi_sampler *samplers,
- unsigned *sampler_units)
+ struct tgsi_sampler *samplers)
{
fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function);
assert(runner);
- runner(fx, fy, dests, inputs, prog->num_interp,
- consts, prog->num_consts,
- samplers, sampler_units);
-
- return 0;
+ return runner(fx, fy, dests, inputs, prog->num_interp,
+ consts, prog->num_consts,
+ samplers);
}
void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix)
@@ -1025,11 +1014,12 @@ struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog)
llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false);
+ ee->DisableLazyCompilation();
cpu->engine = ee;
llvm::Function *func = func_for_shader(prog);
- prog->function = ee->getPointerToFunctionOrStub(func);
+ prog->function = ee->getPointerToFunction(func);
CPU = cpu;
return cpu;
}
@@ -1047,10 +1037,11 @@ void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog
llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
llvm::ExecutionEngine *ee = cpu->engine;
assert(ee);
+ ee->DisableLazyCompilation();
ee->addModuleProvider(mp);
llvm::Function *func = func_for_shader(prog);
- prog->function = ee->getPointerToFunctionOrStub(func);
+ prog->function = ee->getPointerToFunction(func);
}
void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu)
diff --git a/src/mesa/pipe/llvm/gallivm.h b/src/mesa/pipe/llvm/gallivm.h
index 6a05a55db4..fd9a11e5b6 100644
--- a/src/mesa/pipe/llvm/gallivm.h
+++ b/src/mesa/pipe/llvm/gallivm.h
@@ -67,8 +67,7 @@ int gallivm_fragment_shader_exec(struct gallivm_prog *prog,
float (*dests)[PIPE_MAX_SHADER_INPUTS][4],
float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
float (*consts)[4],
- struct tgsi_sampler *samplers,
- unsigned *sampler_units);
+ struct tgsi_sampler *samplers);
void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
const struct tgsi_interp_coef *coefs);
diff --git a/src/mesa/pipe/llvm/gallivm_builtins.cpp b/src/mesa/pipe/llvm/gallivm_builtins.cpp
index da1e6ae1de..48693ca2ed 100644
--- a/src/mesa/pipe/llvm/gallivm_builtins.cpp
+++ b/src/mesa/pipe/llvm/gallivm_builtins.cpp
@@ -69,6 +69,15 @@ FunctionType* FuncTy_12 = FunctionType::get(
PointerType* PointerTy_11 = PointerType::get(FuncTy_12);
+std::vector<const Type*>FuncTy_13_args;
+FuncTy_13_args.push_back(VectorTy_4);
+ParamAttrsList *FuncTy_13_PAL = 0;
+FunctionType* FuncTy_13 = FunctionType::get(
+ /*Result=*/IntegerType::get(32),
+ /*Params=*/FuncTy_13_args,
+ /*isVarArg=*/false,
+ /*ParamAttrs=*/FuncTy_13_PAL);
+
// Function Declarations
@@ -132,6 +141,12 @@ Function* func_vsin = new Function(
/*Name=*/"vsin", mod);
func_vsin->setCallingConv(CallingConv::C);
+Function* func_kilp = new Function(
+ /*Type=*/FuncTy_13,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"kilp", mod);
+func_kilp->setCallingConv(CallingConv::C);
+
// Global Variable Declarations
@@ -152,64 +167,64 @@ GlobalVariable* gvar_array__str1 = new GlobalVariable(
mod);
// Constant Definitions
-Constant* const_array_13 = ConstantArray::get("VEC IN is %f %f %f %f\x0A", true);
-Constant* const_array_14 = ConstantArray::get("VEC OUT is %f %f %f %f\x0A", true);
-ConstantFP* const_float_15 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f));
-ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f));
-Constant* const_float_17 = Constant::getNullValue(Type::FloatTy);
-Constant* const_int32_18 = Constant::getNullValue(IntegerType::get(32));
-std::vector<Constant*> const_packed_19_elems;
-ConstantFP* const_float_20 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
-const_packed_19_elems.push_back(const_float_20);
-UndefValue* const_float_21 = UndefValue::get(Type::FloatTy);
-const_packed_19_elems.push_back(const_float_21);
-const_packed_19_elems.push_back(const_float_21);
-const_packed_19_elems.push_back(const_float_20);
-Constant* const_packed_19 = ConstantVector::get(VectorTy_4, const_packed_19_elems);
-ConstantInt* const_int32_22 = ConstantInt::get(APInt(32, "1", 10));
-ConstantInt* const_int32_23 = ConstantInt::get(APInt(32, "3", 10));
-ConstantInt* const_int32_24 = ConstantInt::get(APInt(32, "2", 10));
-std::vector<Constant*> const_packed_25_elems;
-const_packed_25_elems.push_back(const_float_20);
-const_packed_25_elems.push_back(const_float_17);
-const_packed_25_elems.push_back(const_float_17);
-const_packed_25_elems.push_back(const_float_20);
-Constant* const_packed_25 = ConstantVector::get(VectorTy_4, const_packed_25_elems);
-Constant* const_double_26 = Constant::getNullValue(Type::DoubleTy);
-std::vector<Constant*> const_packed_27_elems;
-const_packed_27_elems.push_back(const_int32_18);
-ConstantInt* const_int32_28 = ConstantInt::get(APInt(32, "5", 10));
-const_packed_27_elems.push_back(const_int32_28);
-const_packed_27_elems.push_back(const_int32_24);
-const_packed_27_elems.push_back(const_int32_23);
-Constant* const_packed_27 = ConstantVector::get(VectorTy_7, const_packed_27_elems);
-std::vector<Constant*> const_packed_29_elems;
-const_packed_29_elems.push_back(const_int32_18);
-const_packed_29_elems.push_back(const_int32_22);
-ConstantInt* const_int32_30 = ConstantInt::get(APInt(32, "6", 10));
-const_packed_29_elems.push_back(const_int32_30);
-const_packed_29_elems.push_back(const_int32_23);
-Constant* const_packed_29 = ConstantVector::get(VectorTy_7, const_packed_29_elems);
-std::vector<Constant*> const_packed_31_elems;
-const_packed_31_elems.push_back(const_int32_18);
-const_packed_31_elems.push_back(const_int32_22);
-const_packed_31_elems.push_back(const_int32_24);
-ConstantInt* const_int32_32 = ConstantInt::get(APInt(32, "7", 10));
-const_packed_31_elems.push_back(const_int32_32);
-Constant* const_packed_31 = ConstantVector::get(VectorTy_7, const_packed_31_elems);
-std::vector<Constant*> const_ptr_33_indices;
-const_ptr_33_indices.push_back(const_int32_18);
-const_ptr_33_indices.push_back(const_int32_18);
-Constant* const_ptr_33 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_33_indices[0], const_ptr_33_indices.size() );
-UndefValue* const_packed_34 = UndefValue::get(VectorTy_4);
-std::vector<Constant*> const_ptr_35_indices;
-const_ptr_35_indices.push_back(const_int32_18);
-const_ptr_35_indices.push_back(const_int32_18);
-Constant* const_ptr_35 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_35_indices[0], const_ptr_35_indices.size() );
+Constant* const_array_14 = ConstantArray::get("VEC IN is %f %f %f %f\x0A", true);
+Constant* const_array_15 = ConstantArray::get("VEC OUT is %f %f %f %f\x0A", true);
+ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f));
+ConstantFP* const_float_17 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f));
+Constant* const_float_18 = Constant::getNullValue(Type::FloatTy);
+Constant* const_int32_19 = Constant::getNullValue(IntegerType::get(32));
+std::vector<Constant*> const_packed_20_elems;
+ConstantFP* const_float_21 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
+const_packed_20_elems.push_back(const_float_21);
+UndefValue* const_float_22 = UndefValue::get(Type::FloatTy);
+const_packed_20_elems.push_back(const_float_22);
+const_packed_20_elems.push_back(const_float_22);
+const_packed_20_elems.push_back(const_float_21);
+Constant* const_packed_20 = ConstantVector::get(VectorTy_4, const_packed_20_elems);
+ConstantInt* const_int32_23 = ConstantInt::get(APInt(32, "1", 10));
+ConstantInt* const_int32_24 = ConstantInt::get(APInt(32, "3", 10));
+ConstantInt* const_int32_25 = ConstantInt::get(APInt(32, "2", 10));
+std::vector<Constant*> const_packed_26_elems;
+const_packed_26_elems.push_back(const_float_21);
+const_packed_26_elems.push_back(const_float_18);
+const_packed_26_elems.push_back(const_float_18);
+const_packed_26_elems.push_back(const_float_21);
+Constant* const_packed_26 = ConstantVector::get(VectorTy_4, const_packed_26_elems);
+Constant* const_double_27 = Constant::getNullValue(Type::DoubleTy);
+std::vector<Constant*> const_packed_28_elems;
+const_packed_28_elems.push_back(const_int32_19);
+ConstantInt* const_int32_29 = ConstantInt::get(APInt(32, "5", 10));
+const_packed_28_elems.push_back(const_int32_29);
+const_packed_28_elems.push_back(const_int32_25);
+const_packed_28_elems.push_back(const_int32_24);
+Constant* const_packed_28 = ConstantVector::get(VectorTy_7, const_packed_28_elems);
+std::vector<Constant*> const_packed_30_elems;
+const_packed_30_elems.push_back(const_int32_19);
+const_packed_30_elems.push_back(const_int32_23);
+ConstantInt* const_int32_31 = ConstantInt::get(APInt(32, "6", 10));
+const_packed_30_elems.push_back(const_int32_31);
+const_packed_30_elems.push_back(const_int32_24);
+Constant* const_packed_30 = ConstantVector::get(VectorTy_7, const_packed_30_elems);
+std::vector<Constant*> const_packed_32_elems;
+const_packed_32_elems.push_back(const_int32_19);
+const_packed_32_elems.push_back(const_int32_23);
+const_packed_32_elems.push_back(const_int32_25);
+ConstantInt* const_int32_33 = ConstantInt::get(APInt(32, "7", 10));
+const_packed_32_elems.push_back(const_int32_33);
+Constant* const_packed_32 = ConstantVector::get(VectorTy_7, const_packed_32_elems);
+std::vector<Constant*> const_ptr_34_indices;
+const_ptr_34_indices.push_back(const_int32_19);
+const_ptr_34_indices.push_back(const_int32_19);
+Constant* const_ptr_34 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_34_indices[0], const_ptr_34_indices.size() );
+UndefValue* const_packed_35 = UndefValue::get(VectorTy_4);
+std::vector<Constant*> const_ptr_36_indices;
+const_ptr_36_indices.push_back(const_int32_19);
+const_ptr_36_indices.push_back(const_int32_19);
+Constant* const_ptr_36 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_36_indices[0], const_ptr_36_indices.size() );
// Global Variable Definitions
-gvar_array__str->setInitializer(const_array_13);
-gvar_array__str1->setInitializer(const_array_14);
+gvar_array__str->setInitializer(const_array_14);
+gvar_array__str1->setInitializer(const_array_15);
// Function Definitions
@@ -224,12 +239,12 @@ gvar_array__str1->setInitializer(const_array_14);
BasicBlock* label_entry = new BasicBlock("entry",func_approx,0);
// Block entry (label_entry)
- FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_15, "cmp", label_entry);
- SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_15, float_b, "b.addr.0", label_entry);
- FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_16, "cmp3", label_entry);
- SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_16, float_b_addr_0, "b.addr.1", label_entry);
- FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_17, "cmp7", label_entry);
- SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_17, float_a, "a.addr.0", label_entry);
+ FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_16, "cmp", label_entry);
+ SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_16, float_b, "b.addr.0", label_entry);
+ FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_17, "cmp3", label_entry);
+ SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_17, float_b_addr_0, "b.addr.1", label_entry);
+ FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_18, "cmp7", label_entry);
+ SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_18, float_a, "a.addr.0", label_entry);
std::vector<Value*> float_call_params;
float_call_params.push_back(float_a_addr_0);
float_call_params.push_back(float_b_addr_1);
@@ -246,36 +261,36 @@ gvar_array__str1->setInitializer(const_array_14);
Value* packed_tmp = args++;
packed_tmp->setName("tmp");
- BasicBlock* label_entry_37 = new BasicBlock("entry",func_lit,0);
+ BasicBlock* label_entry_38 = new BasicBlock("entry",func_lit,0);
BasicBlock* label_ifthen = new BasicBlock("ifthen",func_lit,0);
BasicBlock* label_UnifiedReturnBlock = new BasicBlock("UnifiedReturnBlock",func_lit,0);
- // Block entry (label_entry_37)
- ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_tmp, const_int32_18, "tmp7", label_entry_37);
- FCmpInst* int1_cmp_38 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp7, const_float_17, "cmp", label_entry_37);
- new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_38, label_entry_37);
+ // Block entry (label_entry_38)
+ ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_tmp, const_int32_19, "tmp7", label_entry_38);
+ FCmpInst* int1_cmp_39 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp7, const_float_18, "cmp", label_entry_38);
+ new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_39, label_entry_38);
// Block ifthen (label_ifthen)
- InsertElementInst* packed_tmp12 = new InsertElementInst(const_packed_19, float_tmp7, const_int32_22, "tmp12", label_ifthen);
- ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_22, "tmp14", label_ifthen);
- ExtractElementInst* float_tmp16 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp16", label_ifthen);
- FCmpInst* int1_cmp_i = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp16, const_float_15, "cmp.i", label_ifthen);
- SelectInst* float_b_addr_0_i = new SelectInst(int1_cmp_i, const_float_15, float_tmp16, "b.addr.0.i", label_ifthen);
- FCmpInst* int1_cmp3_i = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0_i, const_float_16, "cmp3.i", label_ifthen);
- SelectInst* float_b_addr_1_i = new SelectInst(int1_cmp3_i, const_float_16, float_b_addr_0_i, "b.addr.1.i", label_ifthen);
- FCmpInst* int1_cmp7_i = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp14, const_float_17, "cmp7.i", label_ifthen);
- SelectInst* float_a_addr_0_i = new SelectInst(int1_cmp7_i, const_float_17, float_tmp14, "a.addr.0.i", label_ifthen);
+ InsertElementInst* packed_tmp12 = new InsertElementInst(const_packed_20, float_tmp7, const_int32_23, "tmp12", label_ifthen);
+ ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp14", label_ifthen);
+ ExtractElementInst* float_tmp16 = new ExtractElementInst(packed_tmp, const_int32_24, "tmp16", label_ifthen);
+ FCmpInst* int1_cmp_i = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp16, const_float_16, "cmp.i", label_ifthen);
+ SelectInst* float_b_addr_0_i = new SelectInst(int1_cmp_i, const_float_16, float_tmp16, "b.addr.0.i", label_ifthen);
+ FCmpInst* int1_cmp3_i = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0_i, const_float_17, "cmp3.i", label_ifthen);
+ SelectInst* float_b_addr_1_i = new SelectInst(int1_cmp3_i, const_float_17, float_b_addr_0_i, "b.addr.1.i", label_ifthen);
+ FCmpInst* int1_cmp7_i = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp14, const_float_18, "cmp7.i", label_ifthen);
+ SelectInst* float_a_addr_0_i = new SelectInst(int1_cmp7_i, const_float_18, float_tmp14, "a.addr.0.i", label_ifthen);
std::vector<Value*> float_call_i_params;
float_call_i_params.push_back(float_a_addr_0_i);
float_call_i_params.push_back(float_b_addr_1_i);
CallInst* float_call_i = new CallInst(func_powf, float_call_i_params.begin(), float_call_i_params.end(), "call.i", label_ifthen);
float_call_i->setCallingConv(CallingConv::C);
float_call_i->setTailCall(true);
- InsertElementInst* packed_tmp18 = new InsertElementInst(packed_tmp12, float_call_i, const_int32_24, "tmp18", label_ifthen);
+ InsertElementInst* packed_tmp18 = new InsertElementInst(packed_tmp12, float_call_i, const_int32_25, "tmp18", label_ifthen);
new ReturnInst(packed_tmp18, label_ifthen);
// Block UnifiedReturnBlock (label_UnifiedReturnBlock)
- new ReturnInst(const_packed_25, label_UnifiedReturnBlock);
+ new ReturnInst(const_packed_26, label_UnifiedReturnBlock);
}
@@ -289,7 +304,7 @@ gvar_array__str1->setInitializer(const_array_14);
Value* packed_tmp2 = args++;
packed_tmp2->setName("tmp2");
- BasicBlock* label_entry_42 = new BasicBlock("entry",func_cmp,0);
+ BasicBlock* label_entry_43 = new BasicBlock("entry",func_cmp,0);
BasicBlock* label_cond__14 = new BasicBlock("cond.?14",func_cmp,0);
BasicBlock* label_cond_cont20 = new BasicBlock("cond.cont20",func_cmp,0);
BasicBlock* label_cond__28 = new BasicBlock("cond.?28",func_cmp,0);
@@ -297,28 +312,28 @@ gvar_array__str1->setInitializer(const_array_14);
BasicBlock* label_cond__42 = new BasicBlock("cond.?42",func_cmp,0);
BasicBlock* label_cond_cont48 = new BasicBlock("cond.cont48",func_cmp,0);
- // Block entry (label_entry_42)
- ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_18, "tmp3", label_entry_42);
- CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_42);
- FCmpInst* int1_cmp_43 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_26, "cmp", label_entry_42);
- ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_22, "tmp11", label_entry_42);
- CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_42);
- FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_26, "cmp13", label_entry_42);
- SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_43, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_42);
- new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_42);
+ // Block entry (label_entry_43)
+ ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_19, "tmp3", label_entry_43);
+ CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_43);
+ FCmpInst* int1_cmp_44 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_27, "cmp", label_entry_43);
+ ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp11", label_entry_43);
+ CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_43);
+ FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_27, "cmp13", label_entry_43);
+ SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_44, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_43);
+ new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_43);
// Block cond.?14 (label_cond__14)
- ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_27, "tmp233", label_cond__14);
- ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp254", label_cond__14);
+ ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_28, "tmp233", label_cond__14);
+ ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp254", label_cond__14);
CastInst* double_conv265 = new FPExtInst(float_tmp254, Type::DoubleTy, "conv265", label_cond__14);
- FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_26, "cmp276", label_cond__14);
+ FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_27, "cmp276", label_cond__14);
new BranchInst(label_cond__28, label_cond_cont34, int1_cmp276, label_cond__14);
// Block cond.cont20 (label_cond_cont20)
- ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_27, "tmp23", label_cond_cont20);
- ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp25", label_cond_cont20);
+ ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_28, "tmp23", label_cond_cont20);
+ ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp25", label_cond_cont20);
CastInst* double_conv26 = new FPExtInst(float_tmp25, Type::DoubleTy, "conv26", label_cond_cont20);
- FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_26, "cmp27", label_cond_cont20);
+ FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_27, "cmp27", label_cond_cont20);
new BranchInst(label_cond__28, label_cond_cont34, int1_cmp27, label_cond_cont20);
// Block cond.?28 (label_cond__28)
@@ -327,10 +342,10 @@ gvar_array__str1->setInitializer(const_array_14);
packed_tmp23_reg2mem_0->addIncoming(packed_tmp233, label_cond__14);
packed_tmp23_reg2mem_0->addIncoming(packed_tmp23, label_cond_cont20);
- ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_29, "tmp378", label_cond__28);
- ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp399", label_cond__28);
+ ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_30, "tmp378", label_cond__28);
+ ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp399", label_cond__28);
CastInst* double_conv4010 = new FPExtInst(float_tmp399, Type::DoubleTy, "conv4010", label_cond__28);
- FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_26, "cmp4111", label_cond__28);
+ FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_27, "cmp4111", label_cond__28);
new BranchInst(label_cond__42, label_cond_cont48, int1_cmp4111, label_cond__28);
// Block cond.cont34 (label_cond_cont34)
@@ -339,10 +354,10 @@ gvar_array__str1->setInitializer(const_array_14);
packed_tmp23_reg2mem_1->addIncoming(packed_tmp233, label_cond__14);
packed_tmp23_reg2mem_1->addIncoming(packed_tmp23, label_cond_cont20);
- ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_29, "tmp37", label_cond_cont34);
- ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp39", label_cond_cont34);
+ ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_30, "tmp37", label_cond_cont34);
+ ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp39", label_cond_cont34);
CastInst* double_conv40 = new FPExtInst(float_tmp39, Type::DoubleTy, "conv40", label_cond_cont34);
- FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_26, "cmp41", label_cond_cont34);
+ FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_27, "cmp41", label_cond_cont34);
new BranchInst(label_cond__42, label_cond_cont48, int1_cmp41, label_cond_cont34);
// Block cond.?42 (label_cond__42)
@@ -351,7 +366,7 @@ gvar_array__str1->setInitializer(const_array_14);
packed_tmp37_reg2mem_0->addIncoming(packed_tmp378, label_cond__28);
packed_tmp37_reg2mem_0->addIncoming(packed_tmp37, label_cond_cont34);
- ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_31, "tmp5113", label_cond__42);
+ ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_32, "tmp5113", label_cond__42);
new ReturnInst(packed_tmp5113, label_cond__42);
// Block cond.cont48 (label_cond_cont48)
@@ -360,7 +375,7 @@ gvar_array__str1->setInitializer(const_array_14);
packed_tmp37_reg2mem_1->addIncoming(packed_tmp378, label_cond__28);
packed_tmp37_reg2mem_1->addIncoming(packed_tmp37, label_cond_cont34);
- ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_31, "tmp51", label_cond_cont48);
+ ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_32, "tmp51", label_cond_cont48);
new ReturnInst(packed_tmp51, label_cond_cont48);
}
@@ -371,87 +386,125 @@ gvar_array__str1->setInitializer(const_array_14);
Value* packed_val = args++;
packed_val->setName("val");
- BasicBlock* label_entry_51 = new BasicBlock("entry",func_vcos,0);
-
- // Block entry (label_entry_51)
- ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_18, "tmp1", label_entry_51);
- CastInst* double_conv_52 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_51);
- ExtractElementInst* float_tmp3_53 = new ExtractElementInst(packed_val, const_int32_22, "tmp3", label_entry_51);
- CastInst* double_conv4 = new FPExtInst(float_tmp3_53, Type::DoubleTy, "conv4", label_entry_51);
- ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_val, const_int32_24, "tmp6", label_entry_51);
- CastInst* double_conv7 = new FPExtInst(float_tmp6, Type::DoubleTy, "conv7", label_entry_51);
- ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_23, "tmp9", label_entry_51);
- CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_51);
+ BasicBlock* label_entry_52 = new BasicBlock("entry",func_vcos,0);
+
+ // Block entry (label_entry_52)
+ ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_19, "tmp1", label_entry_52);
+ CastInst* double_conv_53 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_52);
+ ExtractElementInst* float_tmp3_54 = new ExtractElementInst(packed_val, const_int32_23, "tmp3", label_entry_52);
+ CastInst* double_conv4 = new FPExtInst(float_tmp3_54, Type::DoubleTy, "conv4", label_entry_52);
+ ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_val, const_int32_25, "tmp6", label_entry_52);
+ CastInst* double_conv7 = new FPExtInst(float_tmp6, Type::DoubleTy, "conv7", label_entry_52);
+ ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_24, "tmp9", label_entry_52);
+ CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_52);
std::vector<Value*> int32_call_params;
- int32_call_params.push_back(const_ptr_33);
- int32_call_params.push_back(double_conv_52);
+ int32_call_params.push_back(const_ptr_34);
+ int32_call_params.push_back(double_conv_53);
int32_call_params.push_back(double_conv4);
int32_call_params.push_back(double_conv7);
int32_call_params.push_back(double_conv10);
- CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_51);
+ CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_52);
int32_call->setCallingConv(CallingConv::C);
int32_call->setTailCall(true);
- CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_51);
+ CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_52);
float_call13->setCallingConv(CallingConv::C);
float_call13->setTailCall(true);
- InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_34, float_call13, const_int32_18, "tmp15", label_entry_51);
- InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call13, const_int32_22, "tmp20", label_entry_51);
- InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call13, const_int32_24, "tmp25", label_entry_51);
- InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call13, const_int32_23, "tmp30", label_entry_51);
- CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_51);
+ InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_35, float_call13, const_int32_19, "tmp15", label_entry_52);
+ InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call13, const_int32_23, "tmp20", label_entry_52);
+ InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call13, const_int32_25, "tmp25", label_entry_52);
+ InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call13, const_int32_24, "tmp30", label_entry_52);
+ CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_52);
std::vector<Value*> int32_call43_params;
- int32_call43_params.push_back(const_ptr_35);
+ int32_call43_params.push_back(const_ptr_36);
int32_call43_params.push_back(double_conv33);
int32_call43_params.push_back(double_conv33);
int32_call43_params.push_back(double_conv33);
int32_call43_params.push_back(double_conv33);
- CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_51);
+ CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_52);
int32_call43->setCallingConv(CallingConv::C);
int32_call43->setTailCall(true);
- new ReturnInst(packed_tmp30, label_entry_51);
+ new ReturnInst(packed_tmp30, label_entry_52);
}
// Function: scs (func_scs)
{
Function::arg_iterator args = func_scs->arg_begin();
- Value* packed_val_55 = args++;
- packed_val_55->setName("val");
-
- BasicBlock* label_entry_56 = new BasicBlock("entry",func_scs,0);
-
- // Block entry (label_entry_56)
- ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_55, const_int32_18, "tmp2", label_entry_56);
- CallInst* float_call_57 = new CallInst(func_cosf, float_tmp2, "call", label_entry_56);
- float_call_57->setCallingConv(CallingConv::C);
- float_call_57->setTailCall(true);
- InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_34, float_call_57, const_int32_18, "tmp5", label_entry_56);
- CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_56);
+ Value* packed_val_56 = args++;
+ packed_val_56->setName("val");
+
+ BasicBlock* label_entry_57 = new BasicBlock("entry",func_scs,0);
+
+ // Block entry (label_entry_57)
+ ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_56, const_int32_19, "tmp2", label_entry_57);
+ CallInst* float_call_58 = new CallInst(func_cosf, float_tmp2, "call", label_entry_57);
+ float_call_58->setCallingConv(CallingConv::C);
+ float_call_58->setTailCall(true);
+ InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_35, float_call_58, const_int32_19, "tmp5", label_entry_57);
+ CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_57);
float_call7->setCallingConv(CallingConv::C);
float_call7->setTailCall(true);
- InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_22, "tmp9", label_entry_56);
- new ReturnInst(packed_tmp9, label_entry_56);
+ InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_23, "tmp9", label_entry_57);
+ new ReturnInst(packed_tmp9, label_entry_57);
}
// Function: vsin (func_vsin)
{
Function::arg_iterator args = func_vsin->arg_begin();
- Value* packed_val_59 = args++;
- packed_val_59->setName("val");
-
- BasicBlock* label_entry_60 = new BasicBlock("entry",func_vsin,0);
-
- // Block entry (label_entry_60)
- ExtractElementInst* float_tmp2_61 = new ExtractElementInst(packed_val_59, const_int32_18, "tmp2", label_entry_60);
- CallInst* float_call_62 = new CallInst(func_sinf, float_tmp2_61, "call", label_entry_60);
- float_call_62->setCallingConv(CallingConv::C);
- float_call_62->setTailCall(true);
- InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_34, float_call_62, const_int32_18, "tmp6", label_entry_60);
- InsertElementInst* packed_tmp9_63 = new InsertElementInst(packed_tmp6, float_call_62, const_int32_22, "tmp9", label_entry_60);
- InsertElementInst* packed_tmp12_64 = new InsertElementInst(packed_tmp9_63, float_call_62, const_int32_24, "tmp12", label_entry_60);
- InsertElementInst* packed_tmp15_65 = new InsertElementInst(packed_tmp12_64, float_call_62, const_int32_23, "tmp15", label_entry_60);
- new ReturnInst(packed_tmp15_65, label_entry_60);
+ Value* packed_val_60 = args++;
+ packed_val_60->setName("val");
+
+ BasicBlock* label_entry_61 = new BasicBlock("entry",func_vsin,0);
+
+ // Block entry (label_entry_61)
+ ExtractElementInst* float_tmp2_62 = new ExtractElementInst(packed_val_60, const_int32_19, "tmp2", label_entry_61);
+ CallInst* float_call_63 = new CallInst(func_sinf, float_tmp2_62, "call", label_entry_61);
+ float_call_63->setCallingConv(CallingConv::C);
+ float_call_63->setTailCall(true);
+ InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_35, float_call_63, const_int32_19, "tmp6", label_entry_61);
+ InsertElementInst* packed_tmp9_64 = new InsertElementInst(packed_tmp6, float_call_63, const_int32_23, "tmp9", label_entry_61);
+ InsertElementInst* packed_tmp12_65 = new InsertElementInst(packed_tmp9_64, float_call_63, const_int32_25, "tmp12", label_entry_61);
+ InsertElementInst* packed_tmp15_66 = new InsertElementInst(packed_tmp12_65, float_call_63, const_int32_24, "tmp15", label_entry_61);
+ new ReturnInst(packed_tmp15_66, label_entry_61);
+
+}
+
+// Function: kilp (func_kilp)
+{
+ Function::arg_iterator args = func_kilp->arg_begin();
+ Value* packed_val_68 = args++;
+ packed_val_68->setName("val");
+
+ BasicBlock* label_entry_69 = new BasicBlock("entry",func_kilp,0);
+ BasicBlock* label_lor_rhs = new BasicBlock("lor_rhs",func_kilp,0);
+ BasicBlock* label_lor_rhs6 = new BasicBlock("lor_rhs6",func_kilp,0);
+ BasicBlock* label_lor_rhs13 = new BasicBlock("lor_rhs13",func_kilp,0);
+ BasicBlock* label_UnifiedReturnBlock_70 = new BasicBlock("UnifiedReturnBlock",func_kilp,0);
+
+ // Block entry (label_entry_69)
+ ExtractElementInst* float_tmp1_71 = new ExtractElementInst(packed_val_68, const_int32_19, "tmp1", label_entry_69);
+ FCmpInst* int1_cmp_72 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp1_71, const_float_18, "cmp", label_entry_69);
+ new BranchInst(label_UnifiedReturnBlock_70, label_lor_rhs, int1_cmp_72, label_entry_69);
+
+ // Block lor_rhs (label_lor_rhs)
+ ExtractElementInst* float_tmp3_74 = new ExtractElementInst(packed_val_68, const_int32_23, "tmp3", label_lor_rhs);
+ FCmpInst* int1_cmp5 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp3_74, const_float_18, "cmp5", label_lor_rhs);
+ new BranchInst(label_UnifiedReturnBlock_70, label_lor_rhs6, int1_cmp5, label_lor_rhs);
+
+ // Block lor_rhs6 (label_lor_rhs6)
+ ExtractElementInst* float_tmp8 = new ExtractElementInst(packed_val_68, const_int32_25, "tmp8", label_lor_rhs6);
+ FCmpInst* int1_cmp10 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp8, const_float_18, "cmp10", label_lor_rhs6);
+ new BranchInst(label_UnifiedReturnBlock_70, label_lor_rhs13, int1_cmp10, label_lor_rhs6);
+
+ // Block lor_rhs13 (label_lor_rhs13)
+ ExtractElementInst* float_tmp15 = new ExtractElementInst(packed_val_68, const_int32_24, "tmp15", label_lor_rhs13);
+ FCmpInst* int1_cmp17 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp15, const_float_18, "cmp17", label_lor_rhs13);
+ CastInst* int32_retval = new ZExtInst(int1_cmp17, IntegerType::get(32), "retval", label_lor_rhs13);
+ new ReturnInst(int32_retval, label_lor_rhs13);
+
+ // Block UnifiedReturnBlock (label_UnifiedReturnBlock_70)
+ new ReturnInst(const_int32_23, label_UnifiedReturnBlock_70);
}
diff --git a/src/mesa/pipe/llvm/instructions.cpp b/src/mesa/pipe/llvm/instructions.cpp
index 232dd9cd5d..c8d1992587 100644
--- a/src/mesa/pipe/llvm/instructions.cpp
+++ b/src/mesa/pipe/llvm/instructions.cpp
@@ -732,14 +732,10 @@ void Instructions::end()
m_builder.CreateRetVoid();
}
-void Instructions::cal(int label, llvm::Value *out, llvm::Value *in,
- llvm::Value *cst, llvm::Value *temp)
+void Instructions::cal(int label, llvm::Value *input)
{
std::vector<Value*> params;
- params.push_back(out);
- params.push_back(in);
- params.push_back(cst);
- params.push_back(temp);
+ params.push_back(input);
llvm::Function *func = findFunction(label);
m_builder.CreateCall(func, params.begin(), params.end());
@@ -773,15 +769,9 @@ void Instructions::bgnSub(unsigned label)
llvm::Function *func = findFunction(label);
Function::arg_iterator args = func->arg_begin();
- Value *ptr_OUT = args++;
- ptr_OUT->setName("OUT");
- Value *ptr_IN = args++;
- ptr_IN->setName("IN");
- Value *ptr_CONST = args++;
- ptr_CONST->setName("CONST");
- Value *ptr_TEMP = args++;
- ptr_TEMP->setName("TEMP");
- m_storage->pushArguments(ptr_OUT, ptr_IN, ptr_CONST, ptr_TEMP);
+ Value *ptr_INPUT = args++;
+ ptr_INPUT->setName("INPUT");
+ m_storage->pushArguments(ptr_INPUT);
llvm::BasicBlock *entry = new BasicBlock("entry", func, 0);
@@ -874,6 +864,15 @@ llvm::Value * Instructions::scs(llvm::Value *in)
return call;
}
+llvm::Value * Instructions::kilp(llvm::Value *in)
+{
+ llvm::Function *func = m_mod->getFunction("kilp");
+ assert(func);
+
+ CallInst *call = m_builder.CreateCall(func, in, name("kilpres"));
+ call->setTailCall(false);
+ return call;
+}
llvm::Value * Instructions::sin(llvm::Value *in)
{
@@ -886,3 +885,4 @@ llvm::Value * Instructions::sin(llvm::Value *in)
}
#endif //MESA_LLVM
+
diff --git a/src/mesa/pipe/llvm/instructions.h b/src/mesa/pipe/llvm/instructions.h
index e9bfc9d740..9ebc17dd8e 100644
--- a/src/mesa/pipe/llvm/instructions.h
+++ b/src/mesa/pipe/llvm/instructions.h
@@ -62,8 +62,7 @@ public:
void beginLoop();
void bgnSub(unsigned);
void brk();
- void cal(int label, llvm::Value *out, llvm::Value *in,
- llvm::Value *cst, llvm::Value *tmp);
+ void cal(int label, llvm::Value *input);
llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3);
llvm::Value *cos(llvm::Value *in);
llvm::Value *cross(llvm::Value *in1, llvm::Value *in2);
@@ -80,6 +79,7 @@ public:
llvm::Value *floor(llvm::Value *in);
llvm::Value *frc(llvm::Value *in);
void ifop(llvm::Value *in);
+ llvm::Value *kilp(llvm::Value *in);
llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2,
llvm::Value *in3);
llvm::Value *lit(llvm::Value *in);
diff --git a/src/mesa/pipe/llvm/llvm_base_shader.cpp b/src/mesa/pipe/llvm/llvm_base_shader.cpp
index f141ea2da0..85fa389d79 100644
--- a/src/mesa/pipe/llvm/llvm_base_shader.cpp
+++ b/src/mesa/pipe/llvm/llvm_base_shader.cpp
@@ -4,8 +4,23 @@
Module* createBaseShader() {
// Module Construction
Module* mod = new Module("Shader");
+ mod->setDataLayout("");
+ mod->setTargetTriple("i686-apple-darwin9");
// Type Definitions
+ std::vector<const Type*>StructTy_struct_ShaderInput_fields;
+ VectorType* VectorTy_1 = VectorType::get(Type::FloatTy, 4);
+
+ PointerType* PointerTy_0 = PointerType::get(VectorTy_1);
+
+ StructTy_struct_ShaderInput_fields.push_back(PointerTy_0);
+ StructTy_struct_ShaderInput_fields.push_back(PointerTy_0);
+ StructTy_struct_ShaderInput_fields.push_back(PointerTy_0);
+ StructTy_struct_ShaderInput_fields.push_back(PointerTy_0);
+ StructTy_struct_ShaderInput_fields.push_back(IntegerType::get(32));
+ StructType* StructTy_struct_ShaderInput = StructType::get(StructTy_struct_ShaderInput_fields, /*isPacked=*/false);
+ mod->addTypeName("struct.ShaderInput", StructTy_struct_ShaderInput);
+
OpaqueType* OpaqueTy_struct_pipe_mipmap_tree = OpaqueType::get();
mod->addTypeName("struct.pipe_mipmap_tree", OpaqueTy_struct_pipe_mipmap_tree);
@@ -16,77 +31,73 @@ Module* createBaseShader() {
mod->addTypeName("struct.softpipe_tile_cache", OpaqueTy_struct_softpipe_tile_cache);
std::vector<const Type*>StructTy_struct_tgsi_sampler_fields;
- PointerType* PointerTy_0 = PointerType::get(OpaqueTy_struct_pipe_sampler_state);
+ PointerType* PointerTy_2 = PointerType::get(OpaqueTy_struct_pipe_sampler_state);
- StructTy_struct_tgsi_sampler_fields.push_back(PointerTy_0);
- PointerType* PointerTy_1 = PointerType::get(OpaqueTy_struct_pipe_mipmap_tree);
+ StructTy_struct_tgsi_sampler_fields.push_back(PointerTy_2);
+ PointerType* PointerTy_3 = PointerType::get(OpaqueTy_struct_pipe_mipmap_tree);
- StructTy_struct_tgsi_sampler_fields.push_back(PointerTy_1);
- std::vector<const Type*>FuncTy_3_args;
+ StructTy_struct_tgsi_sampler_fields.push_back(PointerTy_3);
+ std::vector<const Type*>FuncTy_5_args;
PATypeHolder StructTy_struct_tgsi_sampler_fwd = OpaqueType::get();
- PointerType* PointerTy_4 = PointerType::get(StructTy_struct_tgsi_sampler_fwd);
+ PointerType* PointerTy_6 = PointerType::get(StructTy_struct_tgsi_sampler_fwd);
- FuncTy_3_args.push_back(PointerTy_4);
- PointerType* PointerTy_5 = PointerType::get(Type::FloatTy);
+ FuncTy_5_args.push_back(PointerTy_6);
+ PointerType* PointerTy_7 = PointerType::get(Type::FloatTy);
- FuncTy_3_args.push_back(PointerTy_5);
- FuncTy_3_args.push_back(PointerTy_5);
- FuncTy_3_args.push_back(PointerTy_5);
- FuncTy_3_args.push_back(Type::FloatTy);
- ArrayType* ArrayTy_7 = ArrayType::get(Type::FloatTy, 4);
+ FuncTy_5_args.push_back(PointerTy_7);
+ FuncTy_5_args.push_back(PointerTy_7);
+ FuncTy_5_args.push_back(PointerTy_7);
+ FuncTy_5_args.push_back(Type::FloatTy);
+ ArrayType* ArrayTy_9 = ArrayType::get(Type::FloatTy, 4);
- PointerType* PointerTy_6 = PointerType::get(ArrayTy_7);
+ PointerType* PointerTy_8 = PointerType::get(ArrayTy_9);
- FuncTy_3_args.push_back(PointerTy_6);
- ParamAttrsList *FuncTy_3_PAL = 0;
- FunctionType* FuncTy_3 = FunctionType::get(
+ FuncTy_5_args.push_back(PointerTy_8);
+ ParamAttrsList *FuncTy_5_PAL = 0;
+ FunctionType* FuncTy_5 = FunctionType::get(
/*Result=*/Type::VoidTy,
- /*Params=*/FuncTy_3_args,
+ /*Params=*/FuncTy_5_args,
/*isVarArg=*/false,
- /*ParamAttrs=*/FuncTy_3_PAL);
+ /*ParamAttrs=*/FuncTy_5_PAL);
- PointerType* PointerTy_2 = PointerType::get(FuncTy_3);
+ PointerType* PointerTy_4 = PointerType::get(FuncTy_5);
- StructTy_struct_tgsi_sampler_fields.push_back(PointerTy_2);
- PointerType* PointerTy_8 = PointerType::get(IntegerType::get(8));
+ StructTy_struct_tgsi_sampler_fields.push_back(PointerTy_4);
+ PointerType* PointerTy_10 = PointerType::get(IntegerType::get(8));
- StructTy_struct_tgsi_sampler_fields.push_back(PointerTy_8);
- PointerType* PointerTy_9 = PointerType::get(OpaqueTy_struct_softpipe_tile_cache);
+ StructTy_struct_tgsi_sampler_fields.push_back(PointerTy_10);
+ PointerType* PointerTy_11 = PointerType::get(OpaqueTy_struct_softpipe_tile_cache);
- StructTy_struct_tgsi_sampler_fields.push_back(PointerTy_9);
+ StructTy_struct_tgsi_sampler_fields.push_back(PointerTy_11);
StructType* StructTy_struct_tgsi_sampler = StructType::get(StructTy_struct_tgsi_sampler_fields, /*isPacked=*/false);
mod->addTypeName("struct.tgsi_sampler", StructTy_struct_tgsi_sampler);
cast<OpaqueType>(StructTy_struct_tgsi_sampler_fwd.get())->refineAbstractTypeTo(StructTy_struct_tgsi_sampler);
StructTy_struct_tgsi_sampler = cast<StructType>(StructTy_struct_tgsi_sampler_fwd.get());
- std::vector<const Type*>FuncTy_10_args;
- VectorType* VectorTy_13 = VectorType::get(Type::FloatTy, 4);
+ std::vector<const Type*>FuncTy_12_args;
+ ArrayType* ArrayTy_14 = ArrayType::get(VectorTy_1, 16);
- ArrayType* ArrayTy_12 = ArrayType::get(VectorTy_13, 16);
+ PointerType* PointerTy_13 = PointerType::get(ArrayTy_14);
- PointerType* PointerTy_11 = PointerType::get(ArrayTy_12);
+ FuncTy_12_args.push_back(PointerTy_13);
+ ArrayType* ArrayTy_16 = ArrayType::get(ArrayTy_9, 16);
- FuncTy_10_args.push_back(PointerTy_11);
- ArrayType* ArrayTy_15 = ArrayType::get(ArrayTy_7, 16);
+ PointerType* PointerTy_15 = PointerType::get(ArrayTy_16);
- PointerType* PointerTy_14 = PointerType::get(ArrayTy_15);
-
- FuncTy_10_args.push_back(PointerTy_14);
- FuncTy_10_args.push_back(IntegerType::get(32));
- FuncTy_10_args.push_back(IntegerType::get(32));
- ParamAttrsList *FuncTy_10_PAL = 0;
- FunctionType* FuncTy_10 = FunctionType::get(
+ FuncTy_12_args.push_back(PointerTy_15);
+ FuncTy_12_args.push_back(IntegerType::get(32));
+ FuncTy_12_args.push_back(IntegerType::get(32));
+ ParamAttrsList *FuncTy_12_PAL = 0;
+ FunctionType* FuncTy_12 = FunctionType::get(
/*Result=*/Type::VoidTy,
- /*Params=*/FuncTy_10_args,
+ /*Params=*/FuncTy_12_args,
/*isVarArg=*/false,
- /*ParamAttrs=*/FuncTy_10_PAL);
-
- PointerType* PointerTy_16 = PointerType::get(VectorTy_13);
+ /*ParamAttrs=*/FuncTy_12_PAL);
std::vector<const Type*>FuncTy_17_args;
- FuncTy_17_args.push_back(PointerTy_16);
- FuncTy_17_args.push_back(PointerTy_6);
+ FuncTy_17_args.push_back(PointerTy_0);
+ FuncTy_17_args.push_back(PointerTy_8);
FuncTy_17_args.push_back(IntegerType::get(32));
ParamAttrsList *FuncTy_17_PAL = 0;
FunctionType* FuncTy_17 = FunctionType::get(
@@ -96,8 +107,8 @@ Module* createBaseShader() {
/*ParamAttrs=*/FuncTy_17_PAL);
std::vector<const Type*>FuncTy_18_args;
- FuncTy_18_args.push_back(PointerTy_6);
- FuncTy_18_args.push_back(PointerTy_16);
+ FuncTy_18_args.push_back(PointerTy_8);
+ FuncTy_18_args.push_back(PointerTy_0);
FuncTy_18_args.push_back(IntegerType::get(32));
ParamAttrsList *FuncTy_18_PAL = 0;
FunctionType* FuncTy_18 = FunctionType::get(
@@ -107,9 +118,9 @@ Module* createBaseShader() {
/*ParamAttrs=*/FuncTy_18_PAL);
std::vector<const Type*>FuncTy_19_args;
- FuncTy_19_args.push_back(PointerTy_14);
- FuncTy_19_args.push_back(PointerTy_14);
- FuncTy_19_args.push_back(PointerTy_6);
+ FuncTy_19_args.push_back(PointerTy_13);
+ FuncTy_19_args.push_back(PointerTy_13);
+ FuncTy_19_args.push_back(PointerTy_8);
FuncTy_19_args.push_back(IntegerType::get(32));
FuncTy_19_args.push_back(IntegerType::get(32));
FuncTy_19_args.push_back(IntegerType::get(32));
@@ -121,23 +132,20 @@ Module* createBaseShader() {
/*isVarArg=*/false,
/*ParamAttrs=*/FuncTy_19_PAL);
- ArrayType* ArrayTy_21 = ArrayType::get(ArrayTy_12, 2048);
+ ArrayType* ArrayTy_21 = ArrayType::get(VectorTy_1, 32);
PointerType* PointerTy_20 = PointerType::get(ArrayTy_21);
- ArrayType* ArrayTy_23 = ArrayType::get(VectorTy_13, 32);
+ ArrayType* ArrayTy_23 = ArrayType::get(VectorTy_1, 128);
PointerType* PointerTy_22 = PointerType::get(ArrayTy_23);
- ArrayType* ArrayTy_25 = ArrayType::get(VectorTy_13, 128);
+ PointerType* PointerTy_24 = PointerType::get(StructTy_struct_ShaderInput);
- PointerType* PointerTy_24 = PointerType::get(ArrayTy_25);
+ PointerType* PointerTy_25 = PointerType::get(PointerTy_0);
std::vector<const Type*>FuncTy_27_args;
- FuncTy_27_args.push_back(PointerTy_16);
- FuncTy_27_args.push_back(PointerTy_16);
- FuncTy_27_args.push_back(PointerTy_16);
- FuncTy_27_args.push_back(PointerTy_16);
+ FuncTy_27_args.push_back(PointerTy_24);
ParamAttrsList *FuncTy_27_PAL = 0;
FunctionType* FuncTy_27 = FunctionType::get(
/*Result=*/Type::VoidTy,
@@ -150,12 +158,12 @@ Module* createBaseShader() {
std::vector<const Type*>FuncTy_28_args;
FuncTy_28_args.push_back(Type::FloatTy);
FuncTy_28_args.push_back(Type::FloatTy);
- FuncTy_28_args.push_back(PointerTy_14);
- FuncTy_28_args.push_back(PointerTy_14);
+ FuncTy_28_args.push_back(PointerTy_13);
+ FuncTy_28_args.push_back(PointerTy_13);
FuncTy_28_args.push_back(IntegerType::get(32));
- FuncTy_28_args.push_back(PointerTy_6);
+ FuncTy_28_args.push_back(PointerTy_8);
FuncTy_28_args.push_back(IntegerType::get(32));
- FuncTy_28_args.push_back(PointerTy_4);
+ FuncTy_28_args.push_back(PointerTy_6);
PointerType* PointerTy_29 = PointerType::get(IntegerType::get(32));
FuncTy_28_args.push_back(PointerTy_29);
@@ -166,15 +174,11 @@ Module* createBaseShader() {
/*isVarArg=*/false,
/*ParamAttrs=*/FuncTy_28_PAL);
- ArrayType* ArrayTy_31 = ArrayType::get(ArrayTy_12, 4);
-
- PointerType* PointerTy_30 = PointerType::get(ArrayTy_31);
-
// Function Declarations
Function* func_from_array = new Function(
- /*Type=*/FuncTy_10,
+ /*Type=*/FuncTy_12,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"from_array", mod);
func_from_array->setCallingConv(CallingConv::C);
@@ -213,13 +217,13 @@ Module* createBaseShader() {
// Constant Definitions
- Constant* const_int32_32 = Constant::getNullValue(IntegerType::get(32));
- UndefValue* const_packed_33 = UndefValue::get(VectorTy_13);
- ConstantInt* const_int32_34 = ConstantInt::get(APInt(32, "1", 10));
- ConstantInt* const_int32_35 = ConstantInt::get(APInt(32, "2", 10));
- ConstantInt* const_int32_36 = ConstantInt::get(APInt(32, "3", 10));
- ConstantInt* const_int32_37 = ConstantInt::get(APInt(32, "4", 10));
- ConstantInt* const_int32_38 = ConstantInt::get(APInt(32, "-1", 10));
+ Constant* const_int32_30 = Constant::getNullValue(IntegerType::get(32));
+ UndefValue* const_packed_31 = UndefValue::get(VectorTy_1);
+ ConstantInt* const_int32_32 = ConstantInt::get(APInt(32, "1", 10));
+ ConstantInt* const_int32_33 = ConstantInt::get(APInt(32, "2", 10));
+ ConstantInt* const_int32_34 = ConstantInt::get(APInt(32, "3", 10));
+ ConstantInt* const_int32_35 = ConstantInt::get(APInt(32, "4", 10));
+ ConstantInt* const_int32_36 = ConstantInt::get(APInt(32, "-1", 10));
// Global Variable Definitions
@@ -243,73 +247,73 @@ Module* createBaseShader() {
BasicBlock* label_afterfor60 = new BasicBlock("afterfor60",func_from_array,0);
// Block entry (label_entry)
- ICmpInst* int1_cmp = new ICmpInst(ICmpInst::ICMP_SGT, int32_count, const_int32_32, "cmp", label_entry);
- ICmpInst* int1_cmp5 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_attribs, const_int32_32, "cmp5", label_entry);
+ ICmpInst* int1_cmp = new ICmpInst(ICmpInst::ICMP_SGT, int32_count, const_int32_30, "cmp", label_entry);
+ ICmpInst* int1_cmp5 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_attribs, const_int32_30, "cmp5", label_entry);
BinaryOperator* int1_bothcond = BinaryOperator::create(Instruction::And, int1_cmp, int1_cmp5, "bothcond", label_entry);
new BranchInst(label_forbody6, label_afterfor60, int1_bothcond, label_entry);
// Block forbody6 (label_forbody6)
- Argument* fwdref_40 = new Argument(IntegerType::get(32));
- Argument* fwdref_41 = new Argument(IntegerType::get(32));
+ Argument* fwdref_38 = new Argument(IntegerType::get(32));
+ Argument* fwdref_39 = new Argument(IntegerType::get(32));
PHINode* int32_i_0_reg2mem_0 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody6);
int32_i_0_reg2mem_0->reserveOperandSpace(3);
- int32_i_0_reg2mem_0->addIncoming(const_int32_32, label_entry);
- int32_i_0_reg2mem_0->addIncoming(fwdref_40, label_forinc57);
- int32_i_0_reg2mem_0->addIncoming(fwdref_41, label_forbody6);
+ int32_i_0_reg2mem_0->addIncoming(const_int32_30, label_entry);
+ int32_i_0_reg2mem_0->addIncoming(fwdref_38, label_forinc57);
+ int32_i_0_reg2mem_0->addIncoming(fwdref_39, label_forbody6);
- Argument* fwdref_42 = new Argument(IntegerType::get(32));
+ Argument* fwdref_40 = new Argument(IntegerType::get(32));
PHINode* int32_j_0_reg2mem_0 = new PHINode(IntegerType::get(32), "j.0.reg2mem.0", label_forbody6);
int32_j_0_reg2mem_0->reserveOperandSpace(3);
- int32_j_0_reg2mem_0->addIncoming(fwdref_42, label_forbody6);
- int32_j_0_reg2mem_0->addIncoming(const_int32_32, label_forinc57);
- int32_j_0_reg2mem_0->addIncoming(const_int32_32, label_entry);
+ int32_j_0_reg2mem_0->addIncoming(fwdref_40, label_forbody6);
+ int32_j_0_reg2mem_0->addIncoming(const_int32_30, label_forinc57);
+ int32_j_0_reg2mem_0->addIncoming(const_int32_30, label_entry);
- Argument* fwdref_43 = new Argument(VectorTy_13);
- PHINode* packed_vec_0_reg2mem_0 = new PHINode(VectorTy_13, "vec.0.reg2mem.0", label_forbody6);
+ Argument* fwdref_41 = new Argument(VectorTy_1);
+ PHINode* packed_vec_0_reg2mem_0 = new PHINode(VectorTy_1, "vec.0.reg2mem.0", label_forbody6);
packed_vec_0_reg2mem_0->reserveOperandSpace(3);
- packed_vec_0_reg2mem_0->addIncoming(fwdref_43, label_forbody6);
- packed_vec_0_reg2mem_0->addIncoming(const_packed_33, label_entry);
- packed_vec_0_reg2mem_0->addIncoming(fwdref_43, label_forinc57);
+ packed_vec_0_reg2mem_0->addIncoming(fwdref_41, label_forbody6);
+ packed_vec_0_reg2mem_0->addIncoming(const_packed_31, label_entry);
+ packed_vec_0_reg2mem_0->addIncoming(fwdref_41, label_forinc57);
std::vector<Value*> ptr_arraydecay11_indices;
ptr_arraydecay11_indices.push_back(int32_i_0_reg2mem_0);
ptr_arraydecay11_indices.push_back(int32_j_0_reg2mem_0);
- ptr_arraydecay11_indices.push_back(const_int32_32);
+ ptr_arraydecay11_indices.push_back(const_int32_30);
Instruction* ptr_arraydecay11 = new GetElementPtrInst(ptr_ainputs, ptr_arraydecay11_indices.begin(), ptr_arraydecay11_indices.end(), "arraydecay11", label_forbody6);
LoadInst* float_tmp13 = new LoadInst(ptr_arraydecay11, "tmp13", false, label_forbody6);
- InsertElementInst* packed_tmp15 = new InsertElementInst(packed_vec_0_reg2mem_0, float_tmp13, const_int32_32, "tmp15", label_forbody6);
+ InsertElementInst* packed_tmp15 = new InsertElementInst(packed_vec_0_reg2mem_0, float_tmp13, const_int32_30, "tmp15", label_forbody6);
std::vector<Value*> ptr_arrayidx23_indices;
ptr_arrayidx23_indices.push_back(int32_i_0_reg2mem_0);
ptr_arrayidx23_indices.push_back(int32_j_0_reg2mem_0);
- ptr_arrayidx23_indices.push_back(const_int32_34);
+ ptr_arrayidx23_indices.push_back(const_int32_32);
Instruction* ptr_arrayidx23 = new GetElementPtrInst(ptr_ainputs, ptr_arrayidx23_indices.begin(), ptr_arrayidx23_indices.end(), "arrayidx23", label_forbody6);
LoadInst* float_tmp24 = new LoadInst(ptr_arrayidx23, "tmp24", false, label_forbody6);
- InsertElementInst* packed_tmp26 = new InsertElementInst(packed_tmp15, float_tmp24, const_int32_34, "tmp26", label_forbody6);
+ InsertElementInst* packed_tmp26 = new InsertElementInst(packed_tmp15, float_tmp24, const_int32_32, "tmp26", label_forbody6);
std::vector<Value*> ptr_arrayidx34_indices;
ptr_arrayidx34_indices.push_back(int32_i_0_reg2mem_0);
ptr_arrayidx34_indices.push_back(int32_j_0_reg2mem_0);
- ptr_arrayidx34_indices.push_back(const_int32_35);
+ ptr_arrayidx34_indices.push_back(const_int32_33);
Instruction* ptr_arrayidx34 = new GetElementPtrInst(ptr_ainputs, ptr_arrayidx34_indices.begin(), ptr_arrayidx34_indices.end(), "arrayidx34", label_forbody6);
LoadInst* float_tmp35 = new LoadInst(ptr_arrayidx34, "tmp35", false, label_forbody6);
- InsertElementInst* packed_tmp37 = new InsertElementInst(packed_tmp26, float_tmp35, const_int32_35, "tmp37", label_forbody6);
+ InsertElementInst* packed_tmp37 = new InsertElementInst(packed_tmp26, float_tmp35, const_int32_33, "tmp37", label_forbody6);
std::vector<Value*> ptr_arrayidx45_indices;
ptr_arrayidx45_indices.push_back(int32_i_0_reg2mem_0);
ptr_arrayidx45_indices.push_back(int32_j_0_reg2mem_0);
- ptr_arrayidx45_indices.push_back(const_int32_36);
+ ptr_arrayidx45_indices.push_back(const_int32_34);
Instruction* ptr_arrayidx45 = new GetElementPtrInst(ptr_ainputs, ptr_arrayidx45_indices.begin(), ptr_arrayidx45_indices.end(), "arrayidx45", label_forbody6);
LoadInst* float_tmp46 = new LoadInst(ptr_arrayidx45, "tmp46", false, label_forbody6);
- InsertElementInst* packed_tmp48 = new InsertElementInst(packed_tmp37, float_tmp46, const_int32_36, "tmp48", label_forbody6);
+ InsertElementInst* packed_tmp48 = new InsertElementInst(packed_tmp37, float_tmp46, const_int32_34, "tmp48", label_forbody6);
std::vector<Value*> ptr_arrayidx54_indices;
ptr_arrayidx54_indices.push_back(int32_i_0_reg2mem_0);
ptr_arrayidx54_indices.push_back(int32_j_0_reg2mem_0);
Instruction* ptr_arrayidx54 = new GetElementPtrInst(ptr_res, ptr_arrayidx54_indices.begin(), ptr_arrayidx54_indices.end(), "arrayidx54", label_forbody6);
- StoreInst* void_44 = new StoreInst(packed_tmp48, ptr_arrayidx54, false, label_forbody6);
- BinaryOperator* int32_inc = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0, const_int32_34, "inc", label_forbody6);
+ StoreInst* void_42 = new StoreInst(packed_tmp48, ptr_arrayidx54, false, label_forbody6);
+ BinaryOperator* int32_inc = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0, const_int32_32, "inc", label_forbody6);
ICmpInst* int1_cmp59 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc, int32_num_attribs, "cmp59", label_forbody6);
new BranchInst(label_forbody6, label_forinc57, int1_cmp59, label_forbody6);
// Block forinc57 (label_forinc57)
- BinaryOperator* int32_inc59 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0, const_int32_34, "inc59", label_forinc57);
+ BinaryOperator* int32_inc59 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0, const_int32_32, "inc59", label_forinc57);
ICmpInst* int1_cmp17 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc59, int32_count, "cmp17", label_forinc57);
new BranchInst(label_forbody6, label_afterfor60, int1_cmp17, label_forinc57);
@@ -317,80 +321,80 @@ Module* createBaseShader() {
new ReturnInst(label_afterfor60);
// Resolve Forward References
- fwdref_41->replaceAllUsesWith(int32_i_0_reg2mem_0); delete fwdref_41;
- fwdref_43->replaceAllUsesWith(packed_tmp48); delete fwdref_43;
- fwdref_42->replaceAllUsesWith(int32_inc); delete fwdref_42;
- fwdref_40->replaceAllUsesWith(int32_inc59); delete fwdref_40;
+ fwdref_39->replaceAllUsesWith(int32_i_0_reg2mem_0); delete fwdref_39;
+ fwdref_41->replaceAllUsesWith(packed_tmp48); delete fwdref_41;
+ fwdref_40->replaceAllUsesWith(int32_inc); delete fwdref_40;
+ fwdref_38->replaceAllUsesWith(int32_inc59); delete fwdref_38;
}
// Function: from_consts (func_from_consts)
{
Function::arg_iterator args = func_from_consts->arg_begin();
- Value* ptr_res_48 = args++;
- ptr_res_48->setName("res");
- Value* ptr_ainputs_49 = args++;
- ptr_ainputs_49->setName("ainputs");
- Value* int32_count_50 = args++;
- int32_count_50->setName("count");
-
- BasicBlock* label_entry_51 = new BasicBlock("entry",func_from_consts,0);
+ Value* ptr_res_46 = args++;
+ ptr_res_46->setName("res");
+ Value* ptr_ainputs_47 = args++;
+ ptr_ainputs_47->setName("ainputs");
+ Value* int32_count_48 = args++;
+ int32_count_48->setName("count");
+
+ BasicBlock* label_entry_49 = new BasicBlock("entry",func_from_consts,0);
BasicBlock* label_forbody = new BasicBlock("forbody",func_from_consts,0);
BasicBlock* label_afterfor = new BasicBlock("afterfor",func_from_consts,0);
- // Block entry (label_entry_51)
- ICmpInst* int1_cmp_52 = new ICmpInst(ICmpInst::ICMP_SGT, int32_count_50, const_int32_32, "cmp", label_entry_51);
- new BranchInst(label_forbody, label_afterfor, int1_cmp_52, label_entry_51);
+ // Block entry (label_entry_49)
+ ICmpInst* int1_cmp_50 = new ICmpInst(ICmpInst::ICMP_SGT, int32_count_48, const_int32_30, "cmp", label_entry_49);
+ new BranchInst(label_forbody, label_afterfor, int1_cmp_50, label_entry_49);
// Block forbody (label_forbody)
- Argument* fwdref_55 = new Argument(IntegerType::get(32));
- PHINode* int32_i_0_reg2mem_0_54 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody);
- int32_i_0_reg2mem_0_54->reserveOperandSpace(2);
- int32_i_0_reg2mem_0_54->addIncoming(const_int32_32, label_entry_51);
- int32_i_0_reg2mem_0_54->addIncoming(fwdref_55, label_forbody);
-
- Argument* fwdref_57 = new Argument(VectorTy_13);
- PHINode* packed_vec_0_reg2mem_0_56 = new PHINode(VectorTy_13, "vec.0.reg2mem.0", label_forbody);
- packed_vec_0_reg2mem_0_56->reserveOperandSpace(2);
- packed_vec_0_reg2mem_0_56->addIncoming(const_packed_33, label_entry_51);
- packed_vec_0_reg2mem_0_56->addIncoming(fwdref_57, label_forbody);
+ Argument* fwdref_53 = new Argument(IntegerType::get(32));
+ PHINode* int32_i_0_reg2mem_0_52 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody);
+ int32_i_0_reg2mem_0_52->reserveOperandSpace(2);
+ int32_i_0_reg2mem_0_52->addIncoming(const_int32_30, label_entry_49);
+ int32_i_0_reg2mem_0_52->addIncoming(fwdref_53, label_forbody);
+
+ Argument* fwdref_55 = new Argument(VectorTy_1);
+ PHINode* packed_vec_0_reg2mem_0_54 = new PHINode(VectorTy_1, "vec.0.reg2mem.0", label_forbody);
+ packed_vec_0_reg2mem_0_54->reserveOperandSpace(2);
+ packed_vec_0_reg2mem_0_54->addIncoming(const_packed_31, label_entry_49);
+ packed_vec_0_reg2mem_0_54->addIncoming(fwdref_55, label_forbody);
std::vector<Value*> ptr_arraydecay_indices;
- ptr_arraydecay_indices.push_back(int32_i_0_reg2mem_0_54);
- ptr_arraydecay_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay = new GetElementPtrInst(ptr_ainputs_49, ptr_arraydecay_indices.begin(), ptr_arraydecay_indices.end(), "arraydecay", label_forbody);
+ ptr_arraydecay_indices.push_back(int32_i_0_reg2mem_0_52);
+ ptr_arraydecay_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay = new GetElementPtrInst(ptr_ainputs_47, ptr_arraydecay_indices.begin(), ptr_arraydecay_indices.end(), "arraydecay", label_forbody);
LoadInst* float_tmp5 = new LoadInst(ptr_arraydecay, "tmp5", false, label_forbody);
- InsertElementInst* packed_tmp7 = new InsertElementInst(packed_vec_0_reg2mem_0_56, float_tmp5, const_int32_32, "tmp7", label_forbody);
+ InsertElementInst* packed_tmp7 = new InsertElementInst(packed_vec_0_reg2mem_0_54, float_tmp5, const_int32_30, "tmp7", label_forbody);
std::vector<Value*> ptr_arrayidx12_indices;
- ptr_arrayidx12_indices.push_back(int32_i_0_reg2mem_0_54);
- ptr_arrayidx12_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx12 = new GetElementPtrInst(ptr_ainputs_49, ptr_arrayidx12_indices.begin(), ptr_arrayidx12_indices.end(), "arrayidx12", label_forbody);
- LoadInst* float_tmp13_58 = new LoadInst(ptr_arrayidx12, "tmp13", false, label_forbody);
- InsertElementInst* packed_tmp15_59 = new InsertElementInst(packed_tmp7, float_tmp13_58, const_int32_34, "tmp15", label_forbody);
+ ptr_arrayidx12_indices.push_back(int32_i_0_reg2mem_0_52);
+ ptr_arrayidx12_indices.push_back(const_int32_32);
+ Instruction* ptr_arrayidx12 = new GetElementPtrInst(ptr_ainputs_47, ptr_arrayidx12_indices.begin(), ptr_arrayidx12_indices.end(), "arrayidx12", label_forbody);
+ LoadInst* float_tmp13_56 = new LoadInst(ptr_arrayidx12, "tmp13", false, label_forbody);
+ InsertElementInst* packed_tmp15_57 = new InsertElementInst(packed_tmp7, float_tmp13_56, const_int32_32, "tmp15", label_forbody);
std::vector<Value*> ptr_arrayidx20_indices;
- ptr_arrayidx20_indices.push_back(int32_i_0_reg2mem_0_54);
- ptr_arrayidx20_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx20 = new GetElementPtrInst(ptr_ainputs_49, ptr_arrayidx20_indices.begin(), ptr_arrayidx20_indices.end(), "arrayidx20", label_forbody);
+ ptr_arrayidx20_indices.push_back(int32_i_0_reg2mem_0_52);
+ ptr_arrayidx20_indices.push_back(const_int32_33);
+ Instruction* ptr_arrayidx20 = new GetElementPtrInst(ptr_ainputs_47, ptr_arrayidx20_indices.begin(), ptr_arrayidx20_indices.end(), "arrayidx20", label_forbody);
LoadInst* float_tmp21 = new LoadInst(ptr_arrayidx20, "tmp21", false, label_forbody);
- InsertElementInst* packed_tmp23 = new InsertElementInst(packed_tmp15_59, float_tmp21, const_int32_35, "tmp23", label_forbody);
+ InsertElementInst* packed_tmp23 = new InsertElementInst(packed_tmp15_57, float_tmp21, const_int32_33, "tmp23", label_forbody);
std::vector<Value*> ptr_arrayidx28_indices;
- ptr_arrayidx28_indices.push_back(int32_i_0_reg2mem_0_54);
- ptr_arrayidx28_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx28 = new GetElementPtrInst(ptr_ainputs_49, ptr_arrayidx28_indices.begin(), ptr_arrayidx28_indices.end(), "arrayidx28", label_forbody);
+ ptr_arrayidx28_indices.push_back(int32_i_0_reg2mem_0_52);
+ ptr_arrayidx28_indices.push_back(const_int32_34);
+ Instruction* ptr_arrayidx28 = new GetElementPtrInst(ptr_ainputs_47, ptr_arrayidx28_indices.begin(), ptr_arrayidx28_indices.end(), "arrayidx28", label_forbody);
LoadInst* float_tmp29 = new LoadInst(ptr_arrayidx28, "tmp29", false, label_forbody);
- InsertElementInst* packed_tmp31 = new InsertElementInst(packed_tmp23, float_tmp29, const_int32_36, "tmp31", label_forbody);
- GetElementPtrInst* ptr_arrayidx34_60 = new GetElementPtrInst(ptr_res_48, int32_i_0_reg2mem_0_54, "arrayidx34", label_forbody);
- StoreInst* void_61 = new StoreInst(packed_tmp31, ptr_arrayidx34_60, false, label_forbody);
- BinaryOperator* int32_indvar_next = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_54, const_int32_34, "indvar.next", label_forbody);
- ICmpInst* int1_exitcond = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next, int32_count_50, "exitcond", label_forbody);
+ InsertElementInst* packed_tmp31 = new InsertElementInst(packed_tmp23, float_tmp29, const_int32_34, "tmp31", label_forbody);
+ GetElementPtrInst* ptr_arrayidx34_58 = new GetElementPtrInst(ptr_res_46, int32_i_0_reg2mem_0_52, "arrayidx34", label_forbody);
+ StoreInst* void_59 = new StoreInst(packed_tmp31, ptr_arrayidx34_58, false, label_forbody);
+ BinaryOperator* int32_indvar_next = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_52, const_int32_32, "indvar.next", label_forbody);
+ ICmpInst* int1_exitcond = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next, int32_count_48, "exitcond", label_forbody);
new BranchInst(label_afterfor, label_forbody, int1_exitcond, label_forbody);
// Block afterfor (label_afterfor)
new ReturnInst(label_afterfor);
// Resolve Forward References
- fwdref_57->replaceAllUsesWith(packed_tmp31); delete fwdref_57;
- fwdref_55->replaceAllUsesWith(int32_indvar_next); delete fwdref_55;
+ fwdref_55->replaceAllUsesWith(packed_tmp31); delete fwdref_55;
+ fwdref_53->replaceAllUsesWith(int32_indvar_next); delete fwdref_53;
}
@@ -401,353 +405,204 @@ Module* createBaseShader() {
ptr_dests->setName("dests");
Value* ptr_in = args++;
ptr_in->setName("in");
- Value* int32_num_attribs_64 = args++;
- int32_num_attribs_64->setName("num_attribs");
-
- BasicBlock* label_entry_65 = new BasicBlock("entry",func_to_array,0);
- BasicBlock* label_forbody_66 = new BasicBlock("forbody",func_to_array,0);
- BasicBlock* label_afterfor_67 = new BasicBlock("afterfor",func_to_array,0);
-
- // Block entry (label_entry_65)
- ICmpInst* int1_cmp_68 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_attribs_64, const_int32_32, "cmp", label_entry_65);
- new BranchInst(label_forbody_66, label_afterfor_67, int1_cmp_68, label_entry_65);
-
- // Block forbody (label_forbody_66)
- Argument* fwdref_71 = new Argument(IntegerType::get(32));
- PHINode* int32_i_0_reg2mem_0_70 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody_66);
- int32_i_0_reg2mem_0_70->reserveOperandSpace(2);
- int32_i_0_reg2mem_0_70->addIncoming(const_int32_32, label_entry_65);
- int32_i_0_reg2mem_0_70->addIncoming(fwdref_71, label_forbody_66);
-
- std::vector<Value*> ptr_arraydecay_72_indices;
- ptr_arraydecay_72_indices.push_back(int32_i_0_reg2mem_0_70);
- ptr_arraydecay_72_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay_72 = new GetElementPtrInst(ptr_dests, ptr_arraydecay_72_indices.begin(), ptr_arraydecay_72_indices.end(), "arraydecay", label_forbody_66);
- GetElementPtrInst* ptr_arrayidx6 = new GetElementPtrInst(ptr_in, int32_i_0_reg2mem_0_70, "arrayidx6", label_forbody_66);
- LoadInst* packed_tmp7_73 = new LoadInst(ptr_arrayidx6, "tmp7", false, label_forbody_66);
- ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp7_73, const_int32_32, "tmp11", label_forbody_66);
- StoreInst* void_74 = new StoreInst(float_tmp11, ptr_arraydecay_72, false, label_forbody_66);
+ Value* int32_num_attribs_62 = args++;
+ int32_num_attribs_62->setName("num_attribs");
+
+ BasicBlock* label_entry_63 = new BasicBlock("entry",func_to_array,0);
+ BasicBlock* label_forbody_64 = new BasicBlock("forbody",func_to_array,0);
+ BasicBlock* label_afterfor_65 = new BasicBlock("afterfor",func_to_array,0);
+
+ // Block entry (label_entry_63)
+ ICmpInst* int1_cmp_66 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_attribs_62, const_int32_30, "cmp", label_entry_63);
+ new BranchInst(label_forbody_64, label_afterfor_65, int1_cmp_66, label_entry_63);
+
+ // Block forbody (label_forbody_64)
+ Argument* fwdref_69 = new Argument(IntegerType::get(32));
+ PHINode* int32_i_0_reg2mem_0_68 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody_64);
+ int32_i_0_reg2mem_0_68->reserveOperandSpace(2);
+ int32_i_0_reg2mem_0_68->addIncoming(const_int32_30, label_entry_63);
+ int32_i_0_reg2mem_0_68->addIncoming(fwdref_69, label_forbody_64);
+
+ std::vector<Value*> ptr_arraydecay_70_indices;
+ ptr_arraydecay_70_indices.push_back(int32_i_0_reg2mem_0_68);
+ ptr_arraydecay_70_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay_70 = new GetElementPtrInst(ptr_dests, ptr_arraydecay_70_indices.begin(), ptr_arraydecay_70_indices.end(), "arraydecay", label_forbody_64);
+ GetElementPtrInst* ptr_arrayidx6 = new GetElementPtrInst(ptr_in, int32_i_0_reg2mem_0_68, "arrayidx6", label_forbody_64);
+ LoadInst* packed_tmp7_71 = new LoadInst(ptr_arrayidx6, "tmp7", false, label_forbody_64);
+ ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp7_71, const_int32_30, "tmp11", label_forbody_64);
+ StoreInst* void_72 = new StoreInst(float_tmp11, ptr_arraydecay_70, false, label_forbody_64);
std::vector<Value*> ptr_arrayidx13_indices;
- ptr_arrayidx13_indices.push_back(int32_i_0_reg2mem_0_70);
- ptr_arrayidx13_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx13 = new GetElementPtrInst(ptr_dests, ptr_arrayidx13_indices.begin(), ptr_arrayidx13_indices.end(), "arrayidx13", label_forbody_66);
- ExtractElementInst* float_tmp15 = new ExtractElementInst(packed_tmp7_73, const_int32_34, "tmp15", label_forbody_66);
- StoreInst* void_75 = new StoreInst(float_tmp15, ptr_arrayidx13, false, label_forbody_66);
+ ptr_arrayidx13_indices.push_back(int32_i_0_reg2mem_0_68);
+ ptr_arrayidx13_indices.push_back(const_int32_32);
+ Instruction* ptr_arrayidx13 = new GetElementPtrInst(ptr_dests, ptr_arrayidx13_indices.begin(), ptr_arrayidx13_indices.end(), "arrayidx13", label_forbody_64);
+ ExtractElementInst* float_tmp15 = new ExtractElementInst(packed_tmp7_71, const_int32_32, "tmp15", label_forbody_64);
+ StoreInst* void_73 = new StoreInst(float_tmp15, ptr_arrayidx13, false, label_forbody_64);
std::vector<Value*> ptr_arrayidx17_indices;
- ptr_arrayidx17_indices.push_back(int32_i_0_reg2mem_0_70);
- ptr_arrayidx17_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx17 = new GetElementPtrInst(ptr_dests, ptr_arrayidx17_indices.begin(), ptr_arrayidx17_indices.end(), "arrayidx17", label_forbody_66);
- ExtractElementInst* float_tmp19 = new ExtractElementInst(packed_tmp7_73, const_int32_35, "tmp19", label_forbody_66);
- StoreInst* void_76 = new StoreInst(float_tmp19, ptr_arrayidx17, false, label_forbody_66);
+ ptr_arrayidx17_indices.push_back(int32_i_0_reg2mem_0_68);
+ ptr_arrayidx17_indices.push_back(const_int32_33);
+ Instruction* ptr_arrayidx17 = new GetElementPtrInst(ptr_dests, ptr_arrayidx17_indices.begin(), ptr_arrayidx17_indices.end(), "arrayidx17", label_forbody_64);
+ ExtractElementInst* float_tmp19 = new ExtractElementInst(packed_tmp7_71, const_int32_33, "tmp19", label_forbody_64);
+ StoreInst* void_74 = new StoreInst(float_tmp19, ptr_arrayidx17, false, label_forbody_64);
std::vector<Value*> ptr_arrayidx21_indices;
- ptr_arrayidx21_indices.push_back(int32_i_0_reg2mem_0_70);
- ptr_arrayidx21_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx21 = new GetElementPtrInst(ptr_dests, ptr_arrayidx21_indices.begin(), ptr_arrayidx21_indices.end(), "arrayidx21", label_forbody_66);
- ExtractElementInst* float_tmp23 = new ExtractElementInst(packed_tmp7_73, const_int32_36, "tmp23", label_forbody_66);
- StoreInst* void_77 = new StoreInst(float_tmp23, ptr_arrayidx21, false, label_forbody_66);
- BinaryOperator* int32_indvar_next_78 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_70, const_int32_34, "indvar.next", label_forbody_66);
- ICmpInst* int1_exitcond_79 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next_78, int32_num_attribs_64, "exitcond", label_forbody_66);
- new BranchInst(label_afterfor_67, label_forbody_66, int1_exitcond_79, label_forbody_66);
-
- // Block afterfor (label_afterfor_67)
- new ReturnInst(label_afterfor_67);
+ ptr_arrayidx21_indices.push_back(int32_i_0_reg2mem_0_68);
+ ptr_arrayidx21_indices.push_back(const_int32_34);
+ Instruction* ptr_arrayidx21 = new GetElementPtrInst(ptr_dests, ptr_arrayidx21_indices.begin(), ptr_arrayidx21_indices.end(), "arrayidx21", label_forbody_64);
+ ExtractElementInst* float_tmp23 = new ExtractElementInst(packed_tmp7_71, const_int32_34, "tmp23", label_forbody_64);
+ StoreInst* void_75 = new StoreInst(float_tmp23, ptr_arrayidx21, false, label_forbody_64);
+ BinaryOperator* int32_indvar_next_76 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_68, const_int32_32, "indvar.next", label_forbody_64);
+ ICmpInst* int1_exitcond_77 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next_76, int32_num_attribs_62, "exitcond", label_forbody_64);
+ new BranchInst(label_afterfor_65, label_forbody_64, int1_exitcond_77, label_forbody_64);
+
+ // Block afterfor (label_afterfor_65)
+ new ReturnInst(label_afterfor_65);
// Resolve Forward References
- fwdref_71->replaceAllUsesWith(int32_indvar_next_78); delete fwdref_71;
+ fwdref_69->replaceAllUsesWith(int32_indvar_next_76); delete fwdref_69;
}
// Function: run_vertex_shader (func_run_vertex_shader)
{
Function::arg_iterator args = func_run_vertex_shader->arg_begin();
- Value* ptr_ainputs_82 = args++;
- ptr_ainputs_82->setName("ainputs");
- Value* ptr_dests_83 = args++;
- ptr_dests_83->setName("dests");
+ Value* ptr_inputs = args++;
+ ptr_inputs->setName("inputs");
+ Value* ptr_results = args++;
+ ptr_results->setName("results");
Value* ptr_aconsts = args++;
ptr_aconsts->setName("aconsts");
Value* int32_num_vertices = args++;
int32_num_vertices->setName("num_vertices");
Value* int32_num_inputs = args++;
int32_num_inputs->setName("num_inputs");
- Value* int32_num_attribs_84 = args++;
- int32_num_attribs_84->setName("num_attribs");
+ Value* int32_num_attribs_80 = args++;
+ int32_num_attribs_80->setName("num_attribs");
Value* int32_num_consts = args++;
int32_num_consts->setName("num_consts");
- BasicBlock* label_entry_85 = new BasicBlock("entry",func_run_vertex_shader,0);
- BasicBlock* label_forbody6_i = new BasicBlock("forbody6.i",func_run_vertex_shader,0);
- BasicBlock* label_forinc57_i = new BasicBlock("forinc57.i",func_run_vertex_shader,0);
- BasicBlock* label_from_array_exit = new BasicBlock("from_array.exit",func_run_vertex_shader,0);
- BasicBlock* label_forbody_i15 = new BasicBlock("forbody.i15",func_run_vertex_shader,0);
- BasicBlock* label_forcond = new BasicBlock("forcond",func_run_vertex_shader,0);
+ BasicBlock* label_entry_81 = new BasicBlock("entry",func_run_vertex_shader,0);
+ BasicBlock* label_forbody_i = new BasicBlock("forbody.i",func_run_vertex_shader,0);
+ BasicBlock* label_from_consts_exit = new BasicBlock("from_consts.exit",func_run_vertex_shader,0);
BasicBlock* label_forbody_preheader = new BasicBlock("forbody.preheader",func_run_vertex_shader,0);
- BasicBlock* label_forbody_us = new BasicBlock("forbody.us",func_run_vertex_shader,0);
- BasicBlock* label_to_array_exit_us = new BasicBlock("to_array.exit.us",func_run_vertex_shader,0);
- BasicBlock* label_forbody_i_us = new BasicBlock("forbody.i.us",func_run_vertex_shader,0);
- BasicBlock* label_forbody_86 = new BasicBlock("forbody",func_run_vertex_shader,0);
- BasicBlock* label_afterfor_87 = new BasicBlock("afterfor",func_run_vertex_shader,0);
-
- // Block entry (label_entry_85)
- AllocaInst* ptr_inputs = new AllocaInst(ArrayTy_21, "inputs", label_entry_85);
- AllocaInst* ptr_consts = new AllocaInst(ArrayTy_23, "consts", label_entry_85);
- AllocaInst* ptr_results = new AllocaInst(ArrayTy_21, "results", label_entry_85);
- AllocaInst* ptr_temps = new AllocaInst(ArrayTy_25, "temps", label_entry_85);
- ICmpInst* int1_cmp_i = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_vertices, const_int32_32, "cmp.i", label_entry_85);
- ICmpInst* int1_cmp5_i = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_inputs, const_int32_32, "cmp5.i", label_entry_85);
- BinaryOperator* int1_bothcond_i = BinaryOperator::create(Instruction::And, int1_cmp5_i, int1_cmp_i, "bothcond.i", label_entry_85);
- new BranchInst(label_forbody6_i, label_from_array_exit, int1_bothcond_i, label_entry_85);
-
- // Block forbody6.i (label_forbody6_i)
- Argument* fwdref_89 = new Argument(IntegerType::get(32));
- Argument* fwdref_90 = new Argument(IntegerType::get(32));
- PHINode* int32_i_0_reg2mem_0_i_ph = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i.ph", label_forbody6_i);
- int32_i_0_reg2mem_0_i_ph->reserveOperandSpace(3);
- int32_i_0_reg2mem_0_i_ph->addIncoming(const_int32_32, label_entry_85);
- int32_i_0_reg2mem_0_i_ph->addIncoming(fwdref_89, label_forinc57_i);
- int32_i_0_reg2mem_0_i_ph->addIncoming(fwdref_90, label_forbody6_i);
-
- Argument* fwdref_91 = new Argument(IntegerType::get(32));
- PHINode* int32_j_0_reg2mem_0_i = new PHINode(IntegerType::get(32), "j.0.reg2mem.0.i", label_forbody6_i);
- int32_j_0_reg2mem_0_i->reserveOperandSpace(3);
- int32_j_0_reg2mem_0_i->addIncoming(fwdref_91, label_forbody6_i);
- int32_j_0_reg2mem_0_i->addIncoming(const_int32_32, label_forinc57_i);
- int32_j_0_reg2mem_0_i->addIncoming(const_int32_32, label_entry_85);
-
- Argument* fwdref_92 = new Argument(VectorTy_13);
- PHINode* packed_vec_0_reg2mem_0_i = new PHINode(VectorTy_13, "vec.0.reg2mem.0.i", label_forbody6_i);
- packed_vec_0_reg2mem_0_i->reserveOperandSpace(3);
- packed_vec_0_reg2mem_0_i->addIncoming(fwdref_92, label_forbody6_i);
- packed_vec_0_reg2mem_0_i->addIncoming(const_packed_33, label_entry_85);
- packed_vec_0_reg2mem_0_i->addIncoming(fwdref_92, label_forinc57_i);
-
- std::vector<Value*> ptr_arraydecay11_i_indices;
- ptr_arraydecay11_i_indices.push_back(int32_i_0_reg2mem_0_i_ph);
- ptr_arraydecay11_i_indices.push_back(int32_j_0_reg2mem_0_i);
- ptr_arraydecay11_i_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay11_i = new GetElementPtrInst(ptr_ainputs_82, ptr_arraydecay11_i_indices.begin(), ptr_arraydecay11_i_indices.end(), "arraydecay11.i", label_forbody6_i);
- LoadInst* float_tmp13_i = new LoadInst(ptr_arraydecay11_i, "tmp13.i", false, label_forbody6_i);
- InsertElementInst* packed_tmp15_i = new InsertElementInst(packed_vec_0_reg2mem_0_i, float_tmp13_i, const_int32_32, "tmp15.i", label_forbody6_i);
- std::vector<Value*> ptr_arrayidx23_i_indices;
- ptr_arrayidx23_i_indices.push_back(int32_i_0_reg2mem_0_i_ph);
- ptr_arrayidx23_i_indices.push_back(int32_j_0_reg2mem_0_i);
- ptr_arrayidx23_i_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx23_i = new GetElementPtrInst(ptr_ainputs_82, ptr_arrayidx23_i_indices.begin(), ptr_arrayidx23_i_indices.end(), "arrayidx23.i", label_forbody6_i);
- LoadInst* float_tmp24_i = new LoadInst(ptr_arrayidx23_i, "tmp24.i", false, label_forbody6_i);
- InsertElementInst* packed_tmp26_i = new InsertElementInst(packed_tmp15_i, float_tmp24_i, const_int32_34, "tmp26.i", label_forbody6_i);
- std::vector<Value*> ptr_arrayidx34_i_indices;
- ptr_arrayidx34_i_indices.push_back(int32_i_0_reg2mem_0_i_ph);
- ptr_arrayidx34_i_indices.push_back(int32_j_0_reg2mem_0_i);
- ptr_arrayidx34_i_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx34_i = new GetElementPtrInst(ptr_ainputs_82, ptr_arrayidx34_i_indices.begin(), ptr_arrayidx34_i_indices.end(), "arrayidx34.i", label_forbody6_i);
- LoadInst* float_tmp35_i = new LoadInst(ptr_arrayidx34_i, "tmp35.i", false, label_forbody6_i);
- InsertElementInst* packed_tmp37_i = new InsertElementInst(packed_tmp26_i, float_tmp35_i, const_int32_35, "tmp37.i", label_forbody6_i);
- std::vector<Value*> ptr_arrayidx45_i_indices;
- ptr_arrayidx45_i_indices.push_back(int32_i_0_reg2mem_0_i_ph);
- ptr_arrayidx45_i_indices.push_back(int32_j_0_reg2mem_0_i);
- ptr_arrayidx45_i_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx45_i = new GetElementPtrInst(ptr_ainputs_82, ptr_arrayidx45_i_indices.begin(), ptr_arrayidx45_i_indices.end(), "arrayidx45.i", label_forbody6_i);
- LoadInst* float_tmp46_i = new LoadInst(ptr_arrayidx45_i, "tmp46.i", false, label_forbody6_i);
- InsertElementInst* packed_tmp48_i = new InsertElementInst(packed_tmp37_i, float_tmp46_i, const_int32_36, "tmp48.i", label_forbody6_i);
- std::vector<Value*> ptr_arrayidx54_i_indices;
- ptr_arrayidx54_i_indices.push_back(const_int32_32);
- ptr_arrayidx54_i_indices.push_back(int32_i_0_reg2mem_0_i_ph);
- ptr_arrayidx54_i_indices.push_back(int32_j_0_reg2mem_0_i);
- Instruction* ptr_arrayidx54_i = new GetElementPtrInst(ptr_inputs, ptr_arrayidx54_i_indices.begin(), ptr_arrayidx54_i_indices.end(), "arrayidx54.i", label_forbody6_i);
- StoreInst* void_93 = new StoreInst(packed_tmp48_i, ptr_arrayidx54_i, false, label_forbody6_i);
- BinaryOperator* int32_inc_i = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0_i, const_int32_34, "inc.i", label_forbody6_i);
- ICmpInst* int1_cmp59_i = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_i, int32_num_inputs, "cmp59.i", label_forbody6_i);
- new BranchInst(label_forbody6_i, label_forinc57_i, int1_cmp59_i, label_forbody6_i);
-
- // Block forinc57.i (label_forinc57_i)
- BinaryOperator* int32_inc59_i = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i_ph, const_int32_34, "inc59.i", label_forinc57_i);
- ICmpInst* int1_cmp17_i = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc59_i, int32_num_vertices, "cmp17.i", label_forinc57_i);
- new BranchInst(label_forbody6_i, label_from_array_exit, int1_cmp17_i, label_forinc57_i);
-
- // Block from_array.exit (label_from_array_exit)
- ICmpInst* int1_cmp_i4 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts, const_int32_32, "cmp.i4", label_from_array_exit);
- new BranchInst(label_forbody_i15, label_forcond, int1_cmp_i4, label_from_array_exit);
-
- // Block forbody.i15 (label_forbody_i15)
- Argument* fwdref_97 = new Argument(IntegerType::get(32));
- PHINode* int32_i_0_reg2mem_0_i5 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i5", label_forbody_i15);
- int32_i_0_reg2mem_0_i5->reserveOperandSpace(2);
- int32_i_0_reg2mem_0_i5->addIncoming(const_int32_32, label_from_array_exit);
- int32_i_0_reg2mem_0_i5->addIncoming(fwdref_97, label_forbody_i15);
+ BasicBlock* label_forbody_82 = new BasicBlock("forbody",func_run_vertex_shader,0);
+ BasicBlock* label_afterfor_83 = new BasicBlock("afterfor",func_run_vertex_shader,0);
+
+ // Block entry (label_entry_81)
+ AllocaInst* ptr_consts = new AllocaInst(ArrayTy_21, "consts", label_entry_81);
+ AllocaInst* ptr_temps = new AllocaInst(ArrayTy_23, "temps", label_entry_81);
+ AllocaInst* ptr_args = new AllocaInst(StructTy_struct_ShaderInput, "args", label_entry_81);
+ ICmpInst* int1_cmp_i = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts, const_int32_30, "cmp.i", label_entry_81);
+ new BranchInst(label_forbody_i, label_from_consts_exit, int1_cmp_i, label_entry_81);
+
+ // Block forbody.i (label_forbody_i)
+ Argument* fwdref_85 = new Argument(IntegerType::get(32));
+ PHINode* int32_i_0_reg2mem_0_i = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i", label_forbody_i);
+ int32_i_0_reg2mem_0_i->reserveOperandSpace(2);
+ int32_i_0_reg2mem_0_i->addIncoming(const_int32_30, label_entry_81);
+ int32_i_0_reg2mem_0_i->addIncoming(fwdref_85, label_forbody_i);
+
+ Argument* fwdref_86 = new Argument(VectorTy_1);
+ PHINode* packed_vec_0_reg2mem_0_i = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i", label_forbody_i);
+ packed_vec_0_reg2mem_0_i->reserveOperandSpace(2);
+ packed_vec_0_reg2mem_0_i->addIncoming(const_packed_31, label_entry_81);
+ packed_vec_0_reg2mem_0_i->addIncoming(fwdref_86, label_forbody_i);
- Argument* fwdref_98 = new Argument(VectorTy_13);
- PHINode* packed_vec_0_reg2mem_0_i6 = new PHINode(VectorTy_13, "vec.0.reg2mem.0.i6", label_forbody_i15);
- packed_vec_0_reg2mem_0_i6->reserveOperandSpace(2);
- packed_vec_0_reg2mem_0_i6->addIncoming(const_packed_33, label_from_array_exit);
- packed_vec_0_reg2mem_0_i6->addIncoming(fwdref_98, label_forbody_i15);
-
- std::vector<Value*> ptr_arraydecay_i7_indices;
- ptr_arraydecay_i7_indices.push_back(int32_i_0_reg2mem_0_i5);
- ptr_arraydecay_i7_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay_i7 = new GetElementPtrInst(ptr_aconsts, ptr_arraydecay_i7_indices.begin(), ptr_arraydecay_i7_indices.end(), "arraydecay.i7", label_forbody_i15);
- LoadInst* float_tmp5_i = new LoadInst(ptr_arraydecay_i7, "tmp5.i", false, label_forbody_i15);
- InsertElementInst* packed_tmp7_i8 = new InsertElementInst(packed_vec_0_reg2mem_0_i6, float_tmp5_i, const_int32_32, "tmp7.i8", label_forbody_i15);
+ std::vector<Value*> ptr_arraydecay_i_indices;
+ ptr_arraydecay_i_indices.push_back(int32_i_0_reg2mem_0_i);
+ ptr_arraydecay_i_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay_i = new GetElementPtrInst(ptr_aconsts, ptr_arraydecay_i_indices.begin(), ptr_arraydecay_i_indices.end(), "arraydecay.i", label_forbody_i);
+ LoadInst* float_tmp5_i = new LoadInst(ptr_arraydecay_i, "tmp5.i", false, label_forbody_i);
+ InsertElementInst* packed_tmp7_i = new InsertElementInst(packed_vec_0_reg2mem_0_i, float_tmp5_i, const_int32_30, "tmp7.i", label_forbody_i);
std::vector<Value*> ptr_arrayidx12_i_indices;
- ptr_arrayidx12_i_indices.push_back(int32_i_0_reg2mem_0_i5);
- ptr_arrayidx12_i_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx12_i = new GetElementPtrInst(ptr_aconsts, ptr_arrayidx12_i_indices.begin(), ptr_arrayidx12_i_indices.end(), "arrayidx12.i", label_forbody_i15);
- LoadInst* float_tmp13_i9 = new LoadInst(ptr_arrayidx12_i, "tmp13.i9", false, label_forbody_i15);
- InsertElementInst* packed_tmp15_i10 = new InsertElementInst(packed_tmp7_i8, float_tmp13_i9, const_int32_34, "tmp15.i10", label_forbody_i15);
+ ptr_arrayidx12_i_indices.push_back(int32_i_0_reg2mem_0_i);
+ ptr_arrayidx12_i_indices.push_back(const_int32_32);
+ Instruction* ptr_arrayidx12_i = new GetElementPtrInst(ptr_aconsts, ptr_arrayidx12_i_indices.begin(), ptr_arrayidx12_i_indices.end(), "arrayidx12.i", label_forbody_i);
+ LoadInst* float_tmp13_i = new LoadInst(ptr_arrayidx12_i, "tmp13.i", false, label_forbody_i);
+ InsertElementInst* packed_tmp15_i = new InsertElementInst(packed_tmp7_i, float_tmp13_i, const_int32_32, "tmp15.i", label_forbody_i);
std::vector<Value*> ptr_arrayidx20_i_indices;
- ptr_arrayidx20_i_indices.push_back(int32_i_0_reg2mem_0_i5);
- ptr_arrayidx20_i_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx20_i = new GetElementPtrInst(ptr_aconsts, ptr_arrayidx20_i_indices.begin(), ptr_arrayidx20_i_indices.end(), "arrayidx20.i", label_forbody_i15);
- LoadInst* float_tmp21_i = new LoadInst(ptr_arrayidx20_i, "tmp21.i", false, label_forbody_i15);
- InsertElementInst* packed_tmp23_i11 = new InsertElementInst(packed_tmp15_i10, float_tmp21_i, const_int32_35, "tmp23.i11", label_forbody_i15);
+ ptr_arrayidx20_i_indices.push_back(int32_i_0_reg2mem_0_i);
+ ptr_arrayidx20_i_indices.push_back(const_int32_33);
+ Instruction* ptr_arrayidx20_i = new GetElementPtrInst(ptr_aconsts, ptr_arrayidx20_i_indices.begin(), ptr_arrayidx20_i_indices.end(), "arrayidx20.i", label_forbody_i);
+ LoadInst* float_tmp21_i = new LoadInst(ptr_arrayidx20_i, "tmp21.i", false, label_forbody_i);
+ InsertElementInst* packed_tmp23_i = new InsertElementInst(packed_tmp15_i, float_tmp21_i, const_int32_33, "tmp23.i", label_forbody_i);
std::vector<Value*> ptr_arrayidx28_i_indices;
- ptr_arrayidx28_i_indices.push_back(int32_i_0_reg2mem_0_i5);
- ptr_arrayidx28_i_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx28_i = new GetElementPtrInst(ptr_aconsts, ptr_arrayidx28_i_indices.begin(), ptr_arrayidx28_i_indices.end(), "arrayidx28.i", label_forbody_i15);
- LoadInst* float_tmp29_i = new LoadInst(ptr_arrayidx28_i, "tmp29.i", false, label_forbody_i15);
- InsertElementInst* packed_tmp31_i = new InsertElementInst(packed_tmp23_i11, float_tmp29_i, const_int32_36, "tmp31.i", label_forbody_i15);
- std::vector<Value*> ptr_arrayidx34_i12_indices;
- ptr_arrayidx34_i12_indices.push_back(const_int32_32);
- ptr_arrayidx34_i12_indices.push_back(int32_i_0_reg2mem_0_i5);
- Instruction* ptr_arrayidx34_i12 = new GetElementPtrInst(ptr_consts, ptr_arrayidx34_i12_indices.begin(), ptr_arrayidx34_i12_indices.end(), "arrayidx34.i12", label_forbody_i15);
- StoreInst* void_99 = new StoreInst(packed_tmp31_i, ptr_arrayidx34_i12, false, label_forbody_i15);
- BinaryOperator* int32_indvar_next24 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i5, const_int32_34, "indvar.next24", label_forbody_i15);
- ICmpInst* int1_exitcond25 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next24, int32_num_consts, "exitcond25", label_forbody_i15);
- new BranchInst(label_forcond, label_forbody_i15, int1_exitcond25, label_forbody_i15);
-
- // Block forcond (label_forcond)
- new BranchInst(label_forbody_preheader, label_afterfor_87, int1_cmp_i, label_forcond);
+ ptr_arrayidx28_i_indices.push_back(int32_i_0_reg2mem_0_i);
+ ptr_arrayidx28_i_indices.push_back(const_int32_34);
+ Instruction* ptr_arrayidx28_i = new GetElementPtrInst(ptr_aconsts, ptr_arrayidx28_i_indices.begin(), ptr_arrayidx28_i_indices.end(), "arrayidx28.i", label_forbody_i);
+ LoadInst* float_tmp29_i = new LoadInst(ptr_arrayidx28_i, "tmp29.i", false, label_forbody_i);
+ InsertElementInst* packed_tmp31_i = new InsertElementInst(packed_tmp23_i, float_tmp29_i, const_int32_34, "tmp31.i", label_forbody_i);
+ std::vector<Value*> ptr_arrayidx34_i_indices;
+ ptr_arrayidx34_i_indices.push_back(const_int32_30);
+ ptr_arrayidx34_i_indices.push_back(int32_i_0_reg2mem_0_i);
+ Instruction* ptr_arrayidx34_i = new GetElementPtrInst(ptr_consts, ptr_arrayidx34_i_indices.begin(), ptr_arrayidx34_i_indices.end(), "arrayidx34.i", label_forbody_i);
+ StoreInst* void_87 = new StoreInst(packed_tmp31_i, ptr_arrayidx34_i, false, label_forbody_i);
+ BinaryOperator* int32_indvar_next6 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i, const_int32_32, "indvar.next6", label_forbody_i);
+ ICmpInst* int1_exitcond7 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next6, int32_num_consts, "exitcond7", label_forbody_i);
+ new BranchInst(label_from_consts_exit, label_forbody_i, int1_exitcond7, label_forbody_i);
+
+ // Block from_consts.exit (label_from_consts_exit)
+ std::vector<Value*> ptr_tmp2_indices;
+ ptr_tmp2_indices.push_back(const_int32_30);
+ ptr_tmp2_indices.push_back(const_int32_34);
+ Instruction* ptr_tmp2 = new GetElementPtrInst(ptr_args, ptr_tmp2_indices.begin(), ptr_tmp2_indices.end(), "tmp2", label_from_consts_exit);
+ std::vector<Value*> ptr_arraydecay3_indices;
+ ptr_arraydecay3_indices.push_back(const_int32_30);
+ ptr_arraydecay3_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay3 = new GetElementPtrInst(ptr_consts, ptr_arraydecay3_indices.begin(), ptr_arraydecay3_indices.end(), "arraydecay3", label_from_consts_exit);
+ StoreInst* void_89 = new StoreInst(ptr_arraydecay3, ptr_tmp2, false, label_from_consts_exit);
+ std::vector<Value*> ptr_tmp4_indices;
+ ptr_tmp4_indices.push_back(const_int32_30);
+ ptr_tmp4_indices.push_back(const_int32_33);
+ Instruction* ptr_tmp4 = new GetElementPtrInst(ptr_args, ptr_tmp4_indices.begin(), ptr_tmp4_indices.end(), "tmp4", label_from_consts_exit);
+ std::vector<Value*> ptr_arraydecay5_indices;
+ ptr_arraydecay5_indices.push_back(const_int32_30);
+ ptr_arraydecay5_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay5 = new GetElementPtrInst(ptr_temps, ptr_arraydecay5_indices.begin(), ptr_arraydecay5_indices.end(), "arraydecay5", label_from_consts_exit);
+ StoreInst* void_90 = new StoreInst(ptr_arraydecay5, ptr_tmp4, false, label_from_consts_exit);
+ ICmpInst* int1_cmp_91 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_vertices, const_int32_30, "cmp", label_from_consts_exit);
+ new BranchInst(label_forbody_preheader, label_afterfor_83, int1_cmp_91, label_from_consts_exit);
// Block forbody.preheader (label_forbody_preheader)
- std::vector<Value*> ptr_arraydecay17_indices;
- ptr_arraydecay17_indices.push_back(const_int32_32);
- ptr_arraydecay17_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay17 = new GetElementPtrInst(ptr_consts, ptr_arraydecay17_indices.begin(), ptr_arraydecay17_indices.end(), "arraydecay17", label_forbody_preheader);
- std::vector<Value*> ptr_arraydecay18_indices;
- ptr_arraydecay18_indices.push_back(const_int32_32);
- ptr_arraydecay18_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay18 = new GetElementPtrInst(ptr_temps, ptr_arraydecay18_indices.begin(), ptr_arraydecay18_indices.end(), "arraydecay18", label_forbody_preheader);
- ICmpInst* int1_cmp_i1 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_attribs_84, const_int32_32, "cmp.i1", label_forbody_preheader);
- new BranchInst(label_forbody_us, label_forbody_86, int1_cmp_i1, label_forbody_preheader);
-
- // Block forbody.us (label_forbody_us)
- Argument* fwdref_103 = new Argument(IntegerType::get(32));
- PHINode* int32_i_0_reg2mem_0_us = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.us", label_forbody_us);
- int32_i_0_reg2mem_0_us->reserveOperandSpace(2);
- int32_i_0_reg2mem_0_us->addIncoming(const_int32_32, label_forbody_preheader);
- int32_i_0_reg2mem_0_us->addIncoming(fwdref_103, label_to_array_exit_us);
-
- std::vector<Value*> ptr_arraydecay10_us_indices;
- ptr_arraydecay10_us_indices.push_back(const_int32_32);
- ptr_arraydecay10_us_indices.push_back(int32_i_0_reg2mem_0_us);
- ptr_arraydecay10_us_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay10_us = new GetElementPtrInst(ptr_inputs, ptr_arraydecay10_us_indices.begin(), ptr_arraydecay10_us_indices.end(), "arraydecay10.us", label_forbody_us);
- std::vector<Value*> ptr_arraydecay14_us_indices;
- ptr_arraydecay14_us_indices.push_back(const_int32_32);
- ptr_arraydecay14_us_indices.push_back(int32_i_0_reg2mem_0_us);
- ptr_arraydecay14_us_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay14_us = new GetElementPtrInst(ptr_results, ptr_arraydecay14_us_indices.begin(), ptr_arraydecay14_us_indices.end(), "arraydecay14.us", label_forbody_us);
- std::vector<Value*> void_104_params;
- void_104_params.push_back(ptr_arraydecay14_us);
- void_104_params.push_back(ptr_arraydecay10_us);
- void_104_params.push_back(ptr_arraydecay17);
- void_104_params.push_back(ptr_arraydecay18);
- CallInst* void_104 = new CallInst(func_execute_shader, void_104_params.begin(), void_104_params.end(), "", label_forbody_us);
- void_104->setCallingConv(CallingConv::C);
- void_104->setTailCall(false);
- new BranchInst(label_forbody_i_us, label_forbody_us);
-
- // Block to_array.exit.us (label_to_array_exit_us)
- BinaryOperator* int32_inc_us = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_us, const_int32_34, "inc.us", label_to_array_exit_us);
- ICmpInst* int1_cmp21_us = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_us, int32_num_vertices, "cmp21.us", label_to_array_exit_us);
- new BranchInst(label_forbody_us, label_afterfor_87, int1_cmp21_us, label_to_array_exit_us);
-
- // Block forbody.i.us (label_forbody_i_us)
- Argument* fwdref_107 = new Argument(IntegerType::get(32));
- PHINode* int32_i_0_reg2mem_0_i2_us = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i2.us", label_forbody_i_us);
- int32_i_0_reg2mem_0_i2_us->reserveOperandSpace(2);
- int32_i_0_reg2mem_0_i2_us->addIncoming(const_int32_32, label_forbody_us);
- int32_i_0_reg2mem_0_i2_us->addIncoming(fwdref_107, label_forbody_i_us);
-
- std::vector<Value*> ptr_arraydecay_i_us_indices;
- ptr_arraydecay_i_us_indices.push_back(int32_i_0_reg2mem_0_us);
- ptr_arraydecay_i_us_indices.push_back(int32_i_0_reg2mem_0_i2_us);
- ptr_arraydecay_i_us_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay_i_us = new GetElementPtrInst(ptr_dests_83, ptr_arraydecay_i_us_indices.begin(), ptr_arraydecay_i_us_indices.end(), "arraydecay.i.us", label_forbody_i_us);
- std::vector<Value*> ptr_arrayidx6_i_us_indices;
- ptr_arrayidx6_i_us_indices.push_back(const_int32_32);
- ptr_arrayidx6_i_us_indices.push_back(int32_i_0_reg2mem_0_us);
- ptr_arrayidx6_i_us_indices.push_back(int32_i_0_reg2mem_0_i2_us);
- Instruction* ptr_arrayidx6_i_us = new GetElementPtrInst(ptr_results, ptr_arrayidx6_i_us_indices.begin(), ptr_arrayidx6_i_us_indices.end(), "arrayidx6.i.us", label_forbody_i_us);
- LoadInst* packed_tmp7_i_us = new LoadInst(ptr_arrayidx6_i_us, "tmp7.i.us", false, label_forbody_i_us);
- ExtractElementInst* float_tmp11_i_us = new ExtractElementInst(packed_tmp7_i_us, const_int32_32, "tmp11.i.us", label_forbody_i_us);
- StoreInst* void_108 = new StoreInst(float_tmp11_i_us, ptr_arraydecay_i_us, false, label_forbody_i_us);
- std::vector<Value*> ptr_arrayidx13_i_us_indices;
- ptr_arrayidx13_i_us_indices.push_back(int32_i_0_reg2mem_0_us);
- ptr_arrayidx13_i_us_indices.push_back(int32_i_0_reg2mem_0_i2_us);
- ptr_arrayidx13_i_us_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx13_i_us = new GetElementPtrInst(ptr_dests_83, ptr_arrayidx13_i_us_indices.begin(), ptr_arrayidx13_i_us_indices.end(), "arrayidx13.i.us", label_forbody_i_us);
- ExtractElementInst* float_tmp15_i3_us = new ExtractElementInst(packed_tmp7_i_us, const_int32_34, "tmp15.i3.us", label_forbody_i_us);
- StoreInst* void_109 = new StoreInst(float_tmp15_i3_us, ptr_arrayidx13_i_us, false, label_forbody_i_us);
- std::vector<Value*> ptr_arrayidx17_i_us_indices;
- ptr_arrayidx17_i_us_indices.push_back(int32_i_0_reg2mem_0_us);
- ptr_arrayidx17_i_us_indices.push_back(int32_i_0_reg2mem_0_i2_us);
- ptr_arrayidx17_i_us_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx17_i_us = new GetElementPtrInst(ptr_dests_83, ptr_arrayidx17_i_us_indices.begin(), ptr_arrayidx17_i_us_indices.end(), "arrayidx17.i.us", label_forbody_i_us);
- ExtractElementInst* float_tmp19_i_us = new ExtractElementInst(packed_tmp7_i_us, const_int32_35, "tmp19.i.us", label_forbody_i_us);
- StoreInst* void_110 = new StoreInst(float_tmp19_i_us, ptr_arrayidx17_i_us, false, label_forbody_i_us);
- std::vector<Value*> ptr_arrayidx21_i_us_indices;
- ptr_arrayidx21_i_us_indices.push_back(int32_i_0_reg2mem_0_us);
- ptr_arrayidx21_i_us_indices.push_back(int32_i_0_reg2mem_0_i2_us);
- ptr_arrayidx21_i_us_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx21_i_us = new GetElementPtrInst(ptr_dests_83, ptr_arrayidx21_i_us_indices.begin(), ptr_arrayidx21_i_us_indices.end(), "arrayidx21.i.us", label_forbody_i_us);
- ExtractElementInst* float_tmp23_i_us = new ExtractElementInst(packed_tmp7_i_us, const_int32_36, "tmp23.i.us", label_forbody_i_us);
- StoreInst* void_111 = new StoreInst(float_tmp23_i_us, ptr_arrayidx21_i_us, false, label_forbody_i_us);
- BinaryOperator* int32_indvar_next_112 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i2_us, const_int32_34, "indvar.next", label_forbody_i_us);
- ICmpInst* int1_exitcond_113 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next_112, int32_num_attribs_84, "exitcond", label_forbody_i_us);
- new BranchInst(label_to_array_exit_us, label_forbody_i_us, int1_exitcond_113, label_forbody_i_us);
-
- // Block forbody (label_forbody_86)
- Argument* fwdref_116 = new Argument(IntegerType::get(32));
- PHINode* int32_i_0_reg2mem_0_115 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody_86);
- int32_i_0_reg2mem_0_115->reserveOperandSpace(2);
- int32_i_0_reg2mem_0_115->addIncoming(const_int32_32, label_forbody_preheader);
- int32_i_0_reg2mem_0_115->addIncoming(fwdref_116, label_forbody_86);
-
- std::vector<Value*> ptr_arraydecay10_indices;
- ptr_arraydecay10_indices.push_back(const_int32_32);
- ptr_arraydecay10_indices.push_back(int32_i_0_reg2mem_0_115);
- ptr_arraydecay10_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay10 = new GetElementPtrInst(ptr_inputs, ptr_arraydecay10_indices.begin(), ptr_arraydecay10_indices.end(), "arraydecay10", label_forbody_86);
- std::vector<Value*> ptr_arraydecay14_indices;
- ptr_arraydecay14_indices.push_back(const_int32_32);
- ptr_arraydecay14_indices.push_back(int32_i_0_reg2mem_0_115);
- ptr_arraydecay14_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay14 = new GetElementPtrInst(ptr_results, ptr_arraydecay14_indices.begin(), ptr_arraydecay14_indices.end(), "arraydecay14", label_forbody_86);
- std::vector<Value*> void_117_params;
- void_117_params.push_back(ptr_arraydecay14);
- void_117_params.push_back(ptr_arraydecay10);
- void_117_params.push_back(ptr_arraydecay17);
- void_117_params.push_back(ptr_arraydecay18);
- CallInst* void_117 = new CallInst(func_execute_shader, void_117_params.begin(), void_117_params.end(), "", label_forbody_86);
- void_117->setCallingConv(CallingConv::C);
- void_117->setTailCall(false);
- BinaryOperator* int32_inc_118 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_115, const_int32_34, "inc", label_forbody_86);
- ICmpInst* int1_cmp21 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_118, int32_num_vertices, "cmp21", label_forbody_86);
- new BranchInst(label_forbody_86, label_afterfor_87, int1_cmp21, label_forbody_86);
-
- // Block afterfor (label_afterfor_87)
- new ReturnInst(label_afterfor_87);
+ std::vector<Value*> ptr_tmp8_indices;
+ ptr_tmp8_indices.push_back(const_int32_30);
+ ptr_tmp8_indices.push_back(const_int32_30);
+ Instruction* ptr_tmp8 = new GetElementPtrInst(ptr_args, ptr_tmp8_indices.begin(), ptr_tmp8_indices.end(), "tmp8", label_forbody_preheader);
+ std::vector<Value*> ptr_tmp12_indices;
+ ptr_tmp12_indices.push_back(const_int32_30);
+ ptr_tmp12_indices.push_back(const_int32_32);
+ Instruction* ptr_tmp12 = new GetElementPtrInst(ptr_args, ptr_tmp12_indices.begin(), ptr_tmp12_indices.end(), "tmp12", label_forbody_preheader);
+ new BranchInst(label_forbody_82, label_forbody_preheader);
+
+ // Block forbody (label_forbody_82)
+ Argument* fwdref_95 = new Argument(IntegerType::get(32));
+ PHINode* int32_i_0_reg2mem_0_94 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody_82);
+ int32_i_0_reg2mem_0_94->reserveOperandSpace(2);
+ int32_i_0_reg2mem_0_94->addIncoming(const_int32_30, label_forbody_preheader);
+ int32_i_0_reg2mem_0_94->addIncoming(fwdref_95, label_forbody_82);
+
+ std::vector<Value*> ptr_arraydecay11_96_indices;
+ ptr_arraydecay11_96_indices.push_back(int32_i_0_reg2mem_0_94);
+ ptr_arraydecay11_96_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay11_96 = new GetElementPtrInst(ptr_results, ptr_arraydecay11_96_indices.begin(), ptr_arraydecay11_96_indices.end(), "arraydecay11", label_forbody_82);
+ StoreInst* void_97 = new StoreInst(ptr_arraydecay11_96, ptr_tmp8, false, label_forbody_82);
+ std::vector<Value*> ptr_arraydecay16_indices;
+ ptr_arraydecay16_indices.push_back(int32_i_0_reg2mem_0_94);
+ ptr_arraydecay16_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay16 = new GetElementPtrInst(ptr_inputs, ptr_arraydecay16_indices.begin(), ptr_arraydecay16_indices.end(), "arraydecay16", label_forbody_82);
+ StoreInst* void_98 = new StoreInst(ptr_arraydecay16, ptr_tmp12, false, label_forbody_82);
+ CallInst* void_99 = new CallInst(func_execute_shader, ptr_args, "", label_forbody_82);
+ void_99->setCallingConv(CallingConv::C);
+ void_99->setTailCall(false);
+ BinaryOperator* int32_indvar_next_100 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_94, const_int32_32, "indvar.next", label_forbody_82);
+ ICmpInst* int1_exitcond_101 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next_100, int32_num_vertices, "exitcond", label_forbody_82);
+ new BranchInst(label_afterfor_83, label_forbody_82, int1_exitcond_101, label_forbody_82);
+
+ // Block afterfor (label_afterfor_83)
+ new ReturnInst(label_afterfor_83);
// Resolve Forward References
- fwdref_90->replaceAllUsesWith(int32_i_0_reg2mem_0_i_ph); delete fwdref_90;
- fwdref_92->replaceAllUsesWith(packed_tmp48_i); delete fwdref_92;
- fwdref_91->replaceAllUsesWith(int32_inc_i); delete fwdref_91;
- fwdref_89->replaceAllUsesWith(int32_inc59_i); delete fwdref_89;
- fwdref_98->replaceAllUsesWith(packed_tmp31_i); delete fwdref_98;
- fwdref_97->replaceAllUsesWith(int32_indvar_next24); delete fwdref_97;
- fwdref_103->replaceAllUsesWith(int32_inc_us); delete fwdref_103;
- fwdref_107->replaceAllUsesWith(int32_indvar_next_112); delete fwdref_107;
- fwdref_116->replaceAllUsesWith(int32_inc_118); delete fwdref_116;
+ fwdref_86->replaceAllUsesWith(packed_tmp31_i); delete fwdref_86;
+ fwdref_85->replaceAllUsesWith(int32_indvar_next6); delete fwdref_85;
+ fwdref_95->replaceAllUsesWith(int32_indvar_next_100); delete fwdref_95;
}
@@ -758,412 +613,180 @@ Module* createBaseShader() {
float_x->setName("x");
Value* float_y = args++;
float_y->setName("y");
- Value* ptr_dests_121 = args++;
- ptr_dests_121->setName("dests");
- Value* ptr_ainputs_122 = args++;
- ptr_ainputs_122->setName("ainputs");
- Value* int32_num_inputs_123 = args++;
- int32_num_inputs_123->setName("num_inputs");
- Value* ptr_aconsts_124 = args++;
- ptr_aconsts_124->setName("aconsts");
- Value* int32_num_consts_125 = args++;
- int32_num_consts_125->setName("num_consts");
+ Value* ptr_results_104 = args++;
+ ptr_results_104->setName("results");
+ Value* ptr_inputs_105 = args++;
+ ptr_inputs_105->setName("inputs");
+ Value* int32_num_inputs_106 = args++;
+ int32_num_inputs_106->setName("num_inputs");
+ Value* ptr_aconsts_107 = args++;
+ ptr_aconsts_107->setName("aconsts");
+ Value* int32_num_consts_108 = args++;
+ int32_num_consts_108->setName("num_consts");
Value* ptr_samplers = args++;
ptr_samplers->setName("samplers");
- Value* ptr_sampler_units = args++;
- ptr_sampler_units->setName("sampler_units");
-
- BasicBlock* label_entry_126 = new BasicBlock("entry",func_run_fragment_shader,0);
- BasicBlock* label_forbody6_i_127 = new BasicBlock("forbody6.i",func_run_fragment_shader,0);
- BasicBlock* label_from_array_exit_128 = new BasicBlock("from_array.exit",func_run_fragment_shader,0);
- BasicBlock* label_forbody_i13 = new BasicBlock("forbody.i13",func_run_fragment_shader,0);
- BasicBlock* label_forbody_preheader_129 = new BasicBlock("forbody.preheader",func_run_fragment_shader,0);
- BasicBlock* label_forbody_130 = new BasicBlock("forbody",func_run_fragment_shader,0);
- BasicBlock* label_afterfor_131 = new BasicBlock("afterfor",func_run_fragment_shader,0);
- BasicBlock* label_forbody6_i_1 = new BasicBlock("forbody6.i.1",func_run_fragment_shader,0);
- BasicBlock* label_forbody6_i_2 = new BasicBlock("forbody6.i.2",func_run_fragment_shader,0);
- BasicBlock* label_forbody6_i_3 = new BasicBlock("forbody6.i.3",func_run_fragment_shader,0);
-
- // Block entry (label_entry_126)
- AllocaInst* ptr_inputs_132 = new AllocaInst(ArrayTy_31, "inputs", label_entry_126);
- AllocaInst* ptr_consts_133 = new AllocaInst(ArrayTy_23, "consts", label_entry_126);
- AllocaInst* ptr_results_134 = new AllocaInst(ArrayTy_31, "results", label_entry_126);
- AllocaInst* ptr_temps_135 = new AllocaInst(ArrayTy_25, "temps", label_entry_126);
- ICmpInst* int1_cmp5_i_136 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_inputs_123, const_int32_32, "cmp5.i", label_entry_126);
- new BranchInst(label_forbody6_i_127, label_from_array_exit_128, int1_cmp5_i_136, label_entry_126);
-
- // Block forbody6.i (label_forbody6_i_127)
- Argument* fwdref_139 = new Argument(IntegerType::get(32));
- PHINode* int32_j_0_reg2mem_0_i_138 = new PHINode(IntegerType::get(32), "j.0.reg2mem.0.i", label_forbody6_i_127);
- int32_j_0_reg2mem_0_i_138->reserveOperandSpace(2);
- int32_j_0_reg2mem_0_i_138->addIncoming(const_int32_32, label_entry_126);
- int32_j_0_reg2mem_0_i_138->addIncoming(fwdref_139, label_forbody6_i_127);
-
- Argument* fwdref_141 = new Argument(VectorTy_13);
- PHINode* packed_vec_0_reg2mem_0_i_140 = new PHINode(VectorTy_13, "vec.0.reg2mem.0.i", label_forbody6_i_127);
- packed_vec_0_reg2mem_0_i_140->reserveOperandSpace(2);
- packed_vec_0_reg2mem_0_i_140->addIncoming(const_packed_33, label_entry_126);
- packed_vec_0_reg2mem_0_i_140->addIncoming(fwdref_141, label_forbody6_i_127);
-
- std::vector<Value*> ptr_arraydecay11_i_142_indices;
- ptr_arraydecay11_i_142_indices.push_back(const_int32_32);
- ptr_arraydecay11_i_142_indices.push_back(int32_j_0_reg2mem_0_i_138);
- ptr_arraydecay11_i_142_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay11_i_142 = new GetElementPtrInst(ptr_ainputs_122, ptr_arraydecay11_i_142_indices.begin(), ptr_arraydecay11_i_142_indices.end(), "arraydecay11.i", label_forbody6_i_127);
- LoadInst* float_tmp13_i_143 = new LoadInst(ptr_arraydecay11_i_142, "tmp13.i", false, label_forbody6_i_127);
- InsertElementInst* packed_tmp15_i_144 = new InsertElementInst(packed_vec_0_reg2mem_0_i_140, float_tmp13_i_143, const_int32_32, "tmp15.i", label_forbody6_i_127);
- std::vector<Value*> ptr_arrayidx23_i_145_indices;
- ptr_arrayidx23_i_145_indices.push_back(const_int32_32);
- ptr_arrayidx23_i_145_indices.push_back(int32_j_0_reg2mem_0_i_138);
- ptr_arrayidx23_i_145_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx23_i_145 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx23_i_145_indices.begin(), ptr_arrayidx23_i_145_indices.end(), "arrayidx23.i", label_forbody6_i_127);
- LoadInst* float_tmp24_i_146 = new LoadInst(ptr_arrayidx23_i_145, "tmp24.i", false, label_forbody6_i_127);
- InsertElementInst* packed_tmp26_i_147 = new InsertElementInst(packed_tmp15_i_144, float_tmp24_i_146, const_int32_34, "tmp26.i", label_forbody6_i_127);
- std::vector<Value*> ptr_arrayidx34_i_148_indices;
- ptr_arrayidx34_i_148_indices.push_back(const_int32_32);
- ptr_arrayidx34_i_148_indices.push_back(int32_j_0_reg2mem_0_i_138);
- ptr_arrayidx34_i_148_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx34_i_148 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx34_i_148_indices.begin(), ptr_arrayidx34_i_148_indices.end(), "arrayidx34.i", label_forbody6_i_127);
- LoadInst* float_tmp35_i_149 = new LoadInst(ptr_arrayidx34_i_148, "tmp35.i", false, label_forbody6_i_127);
- InsertElementInst* packed_tmp37_i_150 = new InsertElementInst(packed_tmp26_i_147, float_tmp35_i_149, const_int32_35, "tmp37.i", label_forbody6_i_127);
- std::vector<Value*> ptr_arrayidx45_i_151_indices;
- ptr_arrayidx45_i_151_indices.push_back(const_int32_32);
- ptr_arrayidx45_i_151_indices.push_back(int32_j_0_reg2mem_0_i_138);
- ptr_arrayidx45_i_151_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx45_i_151 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx45_i_151_indices.begin(), ptr_arrayidx45_i_151_indices.end(), "arrayidx45.i", label_forbody6_i_127);
- LoadInst* float_tmp46_i_152 = new LoadInst(ptr_arrayidx45_i_151, "tmp46.i", false, label_forbody6_i_127);
- InsertElementInst* packed_tmp48_i_153 = new InsertElementInst(packed_tmp37_i_150, float_tmp46_i_152, const_int32_36, "tmp48.i", label_forbody6_i_127);
- std::vector<Value*> ptr_arrayidx54_i_154_indices;
- ptr_arrayidx54_i_154_indices.push_back(const_int32_32);
- ptr_arrayidx54_i_154_indices.push_back(const_int32_32);
- ptr_arrayidx54_i_154_indices.push_back(int32_j_0_reg2mem_0_i_138);
- Instruction* ptr_arrayidx54_i_154 = new GetElementPtrInst(ptr_inputs_132, ptr_arrayidx54_i_154_indices.begin(), ptr_arrayidx54_i_154_indices.end(), "arrayidx54.i", label_forbody6_i_127);
- StoreInst* void_155 = new StoreInst(packed_tmp48_i_153, ptr_arrayidx54_i_154, false, label_forbody6_i_127);
- BinaryOperator* int32_inc_i_156 = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0_i_138, const_int32_34, "inc.i", label_forbody6_i_127);
- ICmpInst* int1_cmp59_i_157 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_i_156, int32_num_inputs_123, "cmp59.i", label_forbody6_i_127);
- new BranchInst(label_forbody6_i_127, label_forbody6_i_1, int1_cmp59_i_157, label_forbody6_i_127);
-
- // Block from_array.exit (label_from_array_exit_128)
- ICmpInst* int1_cmp_i_159 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts_125, const_int32_32, "cmp.i", label_from_array_exit_128);
- new BranchInst(label_forbody_i13, label_forbody_preheader_129, int1_cmp_i_159, label_from_array_exit_128);
-
- // Block forbody.i13 (label_forbody_i13)
- Argument* fwdref_161 = new Argument(IntegerType::get(32));
- PHINode* int32_i_0_reg2mem_0_i3 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i3", label_forbody_i13);
- int32_i_0_reg2mem_0_i3->reserveOperandSpace(2);
- int32_i_0_reg2mem_0_i3->addIncoming(const_int32_32, label_from_array_exit_128);
- int32_i_0_reg2mem_0_i3->addIncoming(fwdref_161, label_forbody_i13);
-
- Argument* fwdref_162 = new Argument(VectorTy_13);
- PHINode* packed_vec_0_reg2mem_0_i4 = new PHINode(VectorTy_13, "vec.0.reg2mem.0.i4", label_forbody_i13);
- packed_vec_0_reg2mem_0_i4->reserveOperandSpace(2);
- packed_vec_0_reg2mem_0_i4->addIncoming(const_packed_33, label_from_array_exit_128);
- packed_vec_0_reg2mem_0_i4->addIncoming(fwdref_162, label_forbody_i13);
-
- std::vector<Value*> ptr_arraydecay_i5_indices;
- ptr_arraydecay_i5_indices.push_back(int32_i_0_reg2mem_0_i3);
- ptr_arraydecay_i5_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay_i5 = new GetElementPtrInst(ptr_aconsts_124, ptr_arraydecay_i5_indices.begin(), ptr_arraydecay_i5_indices.end(), "arraydecay.i5", label_forbody_i13);
- LoadInst* float_tmp5_i_163 = new LoadInst(ptr_arraydecay_i5, "tmp5.i", false, label_forbody_i13);
- InsertElementInst* packed_tmp7_i6 = new InsertElementInst(packed_vec_0_reg2mem_0_i4, float_tmp5_i_163, const_int32_32, "tmp7.i6", label_forbody_i13);
- std::vector<Value*> ptr_arrayidx12_i_164_indices;
- ptr_arrayidx12_i_164_indices.push_back(int32_i_0_reg2mem_0_i3);
- ptr_arrayidx12_i_164_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx12_i_164 = new GetElementPtrInst(ptr_aconsts_124, ptr_arrayidx12_i_164_indices.begin(), ptr_arrayidx12_i_164_indices.end(), "arrayidx12.i", label_forbody_i13);
- LoadInst* float_tmp13_i7 = new LoadInst(ptr_arrayidx12_i_164, "tmp13.i7", false, label_forbody_i13);
- InsertElementInst* packed_tmp15_i8 = new InsertElementInst(packed_tmp7_i6, float_tmp13_i7, const_int32_34, "tmp15.i8", label_forbody_i13);
- std::vector<Value*> ptr_arrayidx20_i_165_indices;
- ptr_arrayidx20_i_165_indices.push_back(int32_i_0_reg2mem_0_i3);
- ptr_arrayidx20_i_165_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx20_i_165 = new GetElementPtrInst(ptr_aconsts_124, ptr_arrayidx20_i_165_indices.begin(), ptr_arrayidx20_i_165_indices.end(), "arrayidx20.i", label_forbody_i13);
- LoadInst* float_tmp21_i_166 = new LoadInst(ptr_arrayidx20_i_165, "tmp21.i", false, label_forbody_i13);
- InsertElementInst* packed_tmp23_i9 = new InsertElementInst(packed_tmp15_i8, float_tmp21_i_166, const_int32_35, "tmp23.i9", label_forbody_i13);
- std::vector<Value*> ptr_arrayidx28_i_167_indices;
- ptr_arrayidx28_i_167_indices.push_back(int32_i_0_reg2mem_0_i3);
- ptr_arrayidx28_i_167_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx28_i_167 = new GetElementPtrInst(ptr_aconsts_124, ptr_arrayidx28_i_167_indices.begin(), ptr_arrayidx28_i_167_indices.end(), "arrayidx28.i", label_forbody_i13);
- LoadInst* float_tmp29_i_168 = new LoadInst(ptr_arrayidx28_i_167, "tmp29.i", false, label_forbody_i13);
- InsertElementInst* packed_tmp31_i_169 = new InsertElementInst(packed_tmp23_i9, float_tmp29_i_168, const_int32_36, "tmp31.i", label_forbody_i13);
- std::vector<Value*> ptr_arrayidx34_i10_indices;
- ptr_arrayidx34_i10_indices.push_back(const_int32_32);
- ptr_arrayidx34_i10_indices.push_back(int32_i_0_reg2mem_0_i3);
- Instruction* ptr_arrayidx34_i10 = new GetElementPtrInst(ptr_consts_133, ptr_arrayidx34_i10_indices.begin(), ptr_arrayidx34_i10_indices.end(), "arrayidx34.i10", label_forbody_i13);
- StoreInst* void_170 = new StoreInst(packed_tmp31_i_169, ptr_arrayidx34_i10, false, label_forbody_i13);
- BinaryOperator* int32_indvar_next22 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i3, const_int32_34, "indvar.next22", label_forbody_i13);
- ICmpInst* int1_exitcond23 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next22, int32_num_consts_125, "exitcond23", label_forbody_i13);
- new BranchInst(label_forbody_preheader_129, label_forbody_i13, int1_exitcond23, label_forbody_i13);
-
- // Block forbody.preheader (label_forbody_preheader_129)
- std::vector<Value*> ptr_arraydecay15_indices;
- ptr_arraydecay15_indices.push_back(const_int32_32);
- ptr_arraydecay15_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay15 = new GetElementPtrInst(ptr_consts_133, ptr_arraydecay15_indices.begin(), ptr_arraydecay15_indices.end(), "arraydecay15", label_forbody_preheader_129);
- std::vector<Value*> ptr_arraydecay16_indices;
- ptr_arraydecay16_indices.push_back(const_int32_32);
- ptr_arraydecay16_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay16 = new GetElementPtrInst(ptr_temps_135, ptr_arraydecay16_indices.begin(), ptr_arraydecay16_indices.end(), "arraydecay16", label_forbody_preheader_129);
- new BranchInst(label_forbody_130, label_forbody_preheader_129);
-
- // Block forbody (label_forbody_130)
- Argument* fwdref_174 = new Argument(IntegerType::get(32));
- PHINode* int32_i_0_reg2mem_0_173 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody_130);
- int32_i_0_reg2mem_0_173->reserveOperandSpace(2);
- int32_i_0_reg2mem_0_173->addIncoming(const_int32_32, label_forbody_preheader_129);
- int32_i_0_reg2mem_0_173->addIncoming(fwdref_174, label_forbody_130);
-
- std::vector<Value*> ptr_arraydecay8_indices;
- ptr_arraydecay8_indices.push_back(const_int32_32);
- ptr_arraydecay8_indices.push_back(int32_i_0_reg2mem_0_173);
- ptr_arraydecay8_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay8 = new GetElementPtrInst(ptr_inputs_132, ptr_arraydecay8_indices.begin(), ptr_arraydecay8_indices.end(), "arraydecay8", label_forbody_130);
- std::vector<Value*> ptr_arraydecay12_indices;
- ptr_arraydecay12_indices.push_back(const_int32_32);
- ptr_arraydecay12_indices.push_back(int32_i_0_reg2mem_0_173);
- ptr_arraydecay12_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay12 = new GetElementPtrInst(ptr_results_134, ptr_arraydecay12_indices.begin(), ptr_arraydecay12_indices.end(), "arraydecay12", label_forbody_130);
- std::vector<Value*> void_175_params;
- void_175_params.push_back(ptr_arraydecay12);
- void_175_params.push_back(ptr_arraydecay8);
- void_175_params.push_back(ptr_arraydecay15);
- void_175_params.push_back(ptr_arraydecay16);
- CallInst* void_175 = new CallInst(func_execute_shader, void_175_params.begin(), void_175_params.end(), "", label_forbody_130);
- void_175->setCallingConv(CallingConv::C);
- void_175->setTailCall(false);
- std::vector<Value*> ptr_arraydecay_i_indices;
- ptr_arraydecay_i_indices.push_back(int32_i_0_reg2mem_0_173);
- ptr_arraydecay_i_indices.push_back(const_int32_32);
- ptr_arraydecay_i_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay_i = new GetElementPtrInst(ptr_dests_121, ptr_arraydecay_i_indices.begin(), ptr_arraydecay_i_indices.end(), "arraydecay.i", label_forbody_130);
- LoadInst* packed_tmp7_i = new LoadInst(ptr_arraydecay12, "tmp7.i", false, label_forbody_130);
- ExtractElementInst* float_tmp11_i = new ExtractElementInst(packed_tmp7_i, const_int32_32, "tmp11.i", label_forbody_130);
- StoreInst* void_176 = new StoreInst(float_tmp11_i, ptr_arraydecay_i, false, label_forbody_130);
- std::vector<Value*> ptr_arrayidx13_i_indices;
- ptr_arrayidx13_i_indices.push_back(int32_i_0_reg2mem_0_173);
- ptr_arrayidx13_i_indices.push_back(const_int32_32);
- ptr_arrayidx13_i_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx13_i = new GetElementPtrInst(ptr_dests_121, ptr_arrayidx13_i_indices.begin(), ptr_arrayidx13_i_indices.end(), "arrayidx13.i", label_forbody_130);
- ExtractElementInst* float_tmp15_i2 = new ExtractElementInst(packed_tmp7_i, const_int32_34, "tmp15.i2", label_forbody_130);
- StoreInst* void_177 = new StoreInst(float_tmp15_i2, ptr_arrayidx13_i, false, label_forbody_130);
- std::vector<Value*> ptr_arrayidx17_i_indices;
- ptr_arrayidx17_i_indices.push_back(int32_i_0_reg2mem_0_173);
- ptr_arrayidx17_i_indices.push_back(const_int32_32);
- ptr_arrayidx17_i_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx17_i = new GetElementPtrInst(ptr_dests_121, ptr_arrayidx17_i_indices.begin(), ptr_arrayidx17_i_indices.end(), "arrayidx17.i", label_forbody_130);
- ExtractElementInst* float_tmp19_i = new ExtractElementInst(packed_tmp7_i, const_int32_35, "tmp19.i", label_forbody_130);
- StoreInst* void_178 = new StoreInst(float_tmp19_i, ptr_arrayidx17_i, false, label_forbody_130);
- std::vector<Value*> ptr_arrayidx21_i_indices;
- ptr_arrayidx21_i_indices.push_back(int32_i_0_reg2mem_0_173);
- ptr_arrayidx21_i_indices.push_back(const_int32_32);
- ptr_arrayidx21_i_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx21_i = new GetElementPtrInst(ptr_dests_121, ptr_arrayidx21_i_indices.begin(), ptr_arrayidx21_i_indices.end(), "arrayidx21.i", label_forbody_130);
- ExtractElementInst* float_tmp23_i = new ExtractElementInst(packed_tmp7_i, const_int32_36, "tmp23.i", label_forbody_130);
- StoreInst* void_179 = new StoreInst(float_tmp23_i, ptr_arrayidx21_i, false, label_forbody_130);
- std::vector<Value*> ptr_arraydecay_i_1_indices;
- ptr_arraydecay_i_1_indices.push_back(int32_i_0_reg2mem_0_173);
- ptr_arraydecay_i_1_indices.push_back(const_int32_34);
- ptr_arraydecay_i_1_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay_i_1 = new GetElementPtrInst(ptr_dests_121, ptr_arraydecay_i_1_indices.begin(), ptr_arraydecay_i_1_indices.end(), "arraydecay.i.1", label_forbody_130);
- std::vector<Value*> ptr_arrayidx6_i_1_indices;
- ptr_arrayidx6_i_1_indices.push_back(const_int32_32);
- ptr_arrayidx6_i_1_indices.push_back(int32_i_0_reg2mem_0_173);
- ptr_arrayidx6_i_1_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx6_i_1 = new GetElementPtrInst(ptr_results_134, ptr_arrayidx6_i_1_indices.begin(), ptr_arrayidx6_i_1_indices.end(), "arrayidx6.i.1", label_forbody_130);
- LoadInst* packed_tmp7_i_1 = new LoadInst(ptr_arrayidx6_i_1, "tmp7.i.1", false, label_forbody_130);
- ExtractElementInst* float_tmp11_i_1 = new ExtractElementInst(packed_tmp7_i_1, const_int32_32, "tmp11.i.1", label_forbody_130);
- StoreInst* void_180 = new StoreInst(float_tmp11_i_1, ptr_arraydecay_i_1, false, label_forbody_130);
- std::vector<Value*> ptr_arrayidx13_i_1_indices;
- ptr_arrayidx13_i_1_indices.push_back(int32_i_0_reg2mem_0_173);
- ptr_arrayidx13_i_1_indices.push_back(const_int32_34);
- ptr_arrayidx13_i_1_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx13_i_1 = new GetElementPtrInst(ptr_dests_121, ptr_arrayidx13_i_1_indices.begin(), ptr_arrayidx13_i_1_indices.end(), "arrayidx13.i.1", label_forbody_130);
- ExtractElementInst* float_tmp15_i2_1 = new ExtractElementInst(packed_tmp7_i_1, const_int32_34, "tmp15.i2.1", label_forbody_130);
- StoreInst* void_181 = new StoreInst(float_tmp15_i2_1, ptr_arrayidx13_i_1, false, label_forbody_130);
- std::vector<Value*> ptr_arrayidx17_i_1_indices;
- ptr_arrayidx17_i_1_indices.push_back(int32_i_0_reg2mem_0_173);
- ptr_arrayidx17_i_1_indices.push_back(const_int32_34);
- ptr_arrayidx17_i_1_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx17_i_1 = new GetElementPtrInst(ptr_dests_121, ptr_arrayidx17_i_1_indices.begin(), ptr_arrayidx17_i_1_indices.end(), "arrayidx17.i.1", label_forbody_130);
- ExtractElementInst* float_tmp19_i_1 = new ExtractElementInst(packed_tmp7_i_1, const_int32_35, "tmp19.i.1", label_forbody_130);
- StoreInst* void_182 = new StoreInst(float_tmp19_i_1, ptr_arrayidx17_i_1, false, label_forbody_130);
- std::vector<Value*> ptr_arrayidx21_i_1_indices;
- ptr_arrayidx21_i_1_indices.push_back(int32_i_0_reg2mem_0_173);
- ptr_arrayidx21_i_1_indices.push_back(const_int32_34);
- ptr_arrayidx21_i_1_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx21_i_1 = new GetElementPtrInst(ptr_dests_121, ptr_arrayidx21_i_1_indices.begin(), ptr_arrayidx21_i_1_indices.end(), "arrayidx21.i.1", label_forbody_130);
- ExtractElementInst* float_tmp23_i_1 = new ExtractElementInst(packed_tmp7_i_1, const_int32_36, "tmp23.i.1", label_forbody_130);
- StoreInst* void_183 = new StoreInst(float_tmp23_i_1, ptr_arrayidx21_i_1, false, label_forbody_130);
- BinaryOperator* int32_indvar_next20 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_173, const_int32_34, "indvar.next20", label_forbody_130);
- ICmpInst* int1_exitcond21 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next20, const_int32_37, "exitcond21", label_forbody_130);
- new BranchInst(label_afterfor_131, label_forbody_130, int1_exitcond21, label_forbody_130);
-
- // Block afterfor (label_afterfor_131)
- new ReturnInst(const_int32_38, label_afterfor_131);
-
- // Block forbody6.i.1 (label_forbody6_i_1)
- Argument* fwdref_186 = new Argument(IntegerType::get(32));
- PHINode* int32_j_0_reg2mem_0_i_1 = new PHINode(IntegerType::get(32), "j.0.reg2mem.0.i.1", label_forbody6_i_1);
- int32_j_0_reg2mem_0_i_1->reserveOperandSpace(2);
- int32_j_0_reg2mem_0_i_1->addIncoming(const_int32_32, label_forbody6_i_127);
- int32_j_0_reg2mem_0_i_1->addIncoming(fwdref_186, label_forbody6_i_1);
-
- Argument* fwdref_187 = new Argument(VectorTy_13);
- PHINode* packed_vec_0_reg2mem_0_i_1 = new PHINode(VectorTy_13, "vec.0.reg2mem.0.i.1", label_forbody6_i_1);
- packed_vec_0_reg2mem_0_i_1->reserveOperandSpace(2);
- packed_vec_0_reg2mem_0_i_1->addIncoming(packed_tmp48_i_153, label_forbody6_i_127);
- packed_vec_0_reg2mem_0_i_1->addIncoming(fwdref_187, label_forbody6_i_1);
-
- std::vector<Value*> ptr_arraydecay11_i_1_indices;
- ptr_arraydecay11_i_1_indices.push_back(const_int32_34);
- ptr_arraydecay11_i_1_indices.push_back(int32_j_0_reg2mem_0_i_1);
- ptr_arraydecay11_i_1_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay11_i_1 = new GetElementPtrInst(ptr_ainputs_122, ptr_arraydecay11_i_1_indices.begin(), ptr_arraydecay11_i_1_indices.end(), "arraydecay11.i.1", label_forbody6_i_1);
- LoadInst* float_tmp13_i_1 = new LoadInst(ptr_arraydecay11_i_1, "tmp13.i.1", false, label_forbody6_i_1);
- InsertElementInst* packed_tmp15_i_1 = new InsertElementInst(packed_vec_0_reg2mem_0_i_1, float_tmp13_i_1, const_int32_32, "tmp15.i.1", label_forbody6_i_1);
- std::vector<Value*> ptr_arrayidx23_i_1_indices;
- ptr_arrayidx23_i_1_indices.push_back(const_int32_34);
- ptr_arrayidx23_i_1_indices.push_back(int32_j_0_reg2mem_0_i_1);
- ptr_arrayidx23_i_1_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx23_i_1 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx23_i_1_indices.begin(), ptr_arrayidx23_i_1_indices.end(), "arrayidx23.i.1", label_forbody6_i_1);
- LoadInst* float_tmp24_i_1 = new LoadInst(ptr_arrayidx23_i_1, "tmp24.i.1", false, label_forbody6_i_1);
- InsertElementInst* packed_tmp26_i_1 = new InsertElementInst(packed_tmp15_i_1, float_tmp24_i_1, const_int32_34, "tmp26.i.1", label_forbody6_i_1);
- std::vector<Value*> ptr_arrayidx34_i_1_indices;
- ptr_arrayidx34_i_1_indices.push_back(const_int32_34);
- ptr_arrayidx34_i_1_indices.push_back(int32_j_0_reg2mem_0_i_1);
- ptr_arrayidx34_i_1_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx34_i_1 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx34_i_1_indices.begin(), ptr_arrayidx34_i_1_indices.end(), "arrayidx34.i.1", label_forbody6_i_1);
- LoadInst* float_tmp35_i_1 = new LoadInst(ptr_arrayidx34_i_1, "tmp35.i.1", false, label_forbody6_i_1);
- InsertElementInst* packed_tmp37_i_1 = new InsertElementInst(packed_tmp26_i_1, float_tmp35_i_1, const_int32_35, "tmp37.i.1", label_forbody6_i_1);
- std::vector<Value*> ptr_arrayidx45_i_1_indices;
- ptr_arrayidx45_i_1_indices.push_back(const_int32_34);
- ptr_arrayidx45_i_1_indices.push_back(int32_j_0_reg2mem_0_i_1);
- ptr_arrayidx45_i_1_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx45_i_1 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx45_i_1_indices.begin(), ptr_arrayidx45_i_1_indices.end(), "arrayidx45.i.1", label_forbody6_i_1);
- LoadInst* float_tmp46_i_1 = new LoadInst(ptr_arrayidx45_i_1, "tmp46.i.1", false, label_forbody6_i_1);
- InsertElementInst* packed_tmp48_i_1 = new InsertElementInst(packed_tmp37_i_1, float_tmp46_i_1, const_int32_36, "tmp48.i.1", label_forbody6_i_1);
- std::vector<Value*> ptr_arrayidx54_i_1_indices;
- ptr_arrayidx54_i_1_indices.push_back(const_int32_32);
- ptr_arrayidx54_i_1_indices.push_back(const_int32_34);
- ptr_arrayidx54_i_1_indices.push_back(int32_j_0_reg2mem_0_i_1);
- Instruction* ptr_arrayidx54_i_1 = new GetElementPtrInst(ptr_inputs_132, ptr_arrayidx54_i_1_indices.begin(), ptr_arrayidx54_i_1_indices.end(), "arrayidx54.i.1", label_forbody6_i_1);
- StoreInst* void_188 = new StoreInst(packed_tmp48_i_1, ptr_arrayidx54_i_1, false, label_forbody6_i_1);
- BinaryOperator* int32_inc_i_1 = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0_i_1, const_int32_34, "inc.i.1", label_forbody6_i_1);
- ICmpInst* int1_cmp59_i_1 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_i_1, int32_num_inputs_123, "cmp59.i.1", label_forbody6_i_1);
- new BranchInst(label_forbody6_i_1, label_forbody6_i_2, int1_cmp59_i_1, label_forbody6_i_1);
-
- // Block forbody6.i.2 (label_forbody6_i_2)
- Argument* fwdref_190 = new Argument(IntegerType::get(32));
- PHINode* int32_j_0_reg2mem_0_i_2 = new PHINode(IntegerType::get(32), "j.0.reg2mem.0.i.2", label_forbody6_i_2);
- int32_j_0_reg2mem_0_i_2->reserveOperandSpace(2);
- int32_j_0_reg2mem_0_i_2->addIncoming(const_int32_32, label_forbody6_i_1);
- int32_j_0_reg2mem_0_i_2->addIncoming(fwdref_190, label_forbody6_i_2);
-
- Argument* fwdref_191 = new Argument(VectorTy_13);
- PHINode* packed_vec_0_reg2mem_0_i_2 = new PHINode(VectorTy_13, "vec.0.reg2mem.0.i.2", label_forbody6_i_2);
- packed_vec_0_reg2mem_0_i_2->reserveOperandSpace(2);
- packed_vec_0_reg2mem_0_i_2->addIncoming(packed_tmp48_i_1, label_forbody6_i_1);
- packed_vec_0_reg2mem_0_i_2->addIncoming(fwdref_191, label_forbody6_i_2);
-
- std::vector<Value*> ptr_arraydecay11_i_2_indices;
- ptr_arraydecay11_i_2_indices.push_back(const_int32_35);
- ptr_arraydecay11_i_2_indices.push_back(int32_j_0_reg2mem_0_i_2);
- ptr_arraydecay11_i_2_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay11_i_2 = new GetElementPtrInst(ptr_ainputs_122, ptr_arraydecay11_i_2_indices.begin(), ptr_arraydecay11_i_2_indices.end(), "arraydecay11.i.2", label_forbody6_i_2);
- LoadInst* float_tmp13_i_2 = new LoadInst(ptr_arraydecay11_i_2, "tmp13.i.2", false, label_forbody6_i_2);
- InsertElementInst* packed_tmp15_i_2 = new InsertElementInst(packed_vec_0_reg2mem_0_i_2, float_tmp13_i_2, const_int32_32, "tmp15.i.2", label_forbody6_i_2);
- std::vector<Value*> ptr_arrayidx23_i_2_indices;
- ptr_arrayidx23_i_2_indices.push_back(const_int32_35);
- ptr_arrayidx23_i_2_indices.push_back(int32_j_0_reg2mem_0_i_2);
- ptr_arrayidx23_i_2_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx23_i_2 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx23_i_2_indices.begin(), ptr_arrayidx23_i_2_indices.end(), "arrayidx23.i.2", label_forbody6_i_2);
- LoadInst* float_tmp24_i_2 = new LoadInst(ptr_arrayidx23_i_2, "tmp24.i.2", false, label_forbody6_i_2);
- InsertElementInst* packed_tmp26_i_2 = new InsertElementInst(packed_tmp15_i_2, float_tmp24_i_2, const_int32_34, "tmp26.i.2", label_forbody6_i_2);
- std::vector<Value*> ptr_arrayidx34_i_2_indices;
- ptr_arrayidx34_i_2_indices.push_back(const_int32_35);
- ptr_arrayidx34_i_2_indices.push_back(int32_j_0_reg2mem_0_i_2);
- ptr_arrayidx34_i_2_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx34_i_2 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx34_i_2_indices.begin(), ptr_arrayidx34_i_2_indices.end(), "arrayidx34.i.2", label_forbody6_i_2);
- LoadInst* float_tmp35_i_2 = new LoadInst(ptr_arrayidx34_i_2, "tmp35.i.2", false, label_forbody6_i_2);
- InsertElementInst* packed_tmp37_i_2 = new InsertElementInst(packed_tmp26_i_2, float_tmp35_i_2, const_int32_35, "tmp37.i.2", label_forbody6_i_2);
- std::vector<Value*> ptr_arrayidx45_i_2_indices;
- ptr_arrayidx45_i_2_indices.push_back(const_int32_35);
- ptr_arrayidx45_i_2_indices.push_back(int32_j_0_reg2mem_0_i_2);
- ptr_arrayidx45_i_2_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx45_i_2 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx45_i_2_indices.begin(), ptr_arrayidx45_i_2_indices.end(), "arrayidx45.i.2", label_forbody6_i_2);
- LoadInst* float_tmp46_i_2 = new LoadInst(ptr_arrayidx45_i_2, "tmp46.i.2", false, label_forbody6_i_2);
- InsertElementInst* packed_tmp48_i_2 = new InsertElementInst(packed_tmp37_i_2, float_tmp46_i_2, const_int32_36, "tmp48.i.2", label_forbody6_i_2);
- std::vector<Value*> ptr_arrayidx54_i_2_indices;
- ptr_arrayidx54_i_2_indices.push_back(const_int32_32);
- ptr_arrayidx54_i_2_indices.push_back(const_int32_35);
- ptr_arrayidx54_i_2_indices.push_back(int32_j_0_reg2mem_0_i_2);
- Instruction* ptr_arrayidx54_i_2 = new GetElementPtrInst(ptr_inputs_132, ptr_arrayidx54_i_2_indices.begin(), ptr_arrayidx54_i_2_indices.end(), "arrayidx54.i.2", label_forbody6_i_2);
- StoreInst* void_192 = new StoreInst(packed_tmp48_i_2, ptr_arrayidx54_i_2, false, label_forbody6_i_2);
- BinaryOperator* int32_inc_i_2 = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0_i_2, const_int32_34, "inc.i.2", label_forbody6_i_2);
- ICmpInst* int1_cmp59_i_2 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_i_2, int32_num_inputs_123, "cmp59.i.2", label_forbody6_i_2);
- new BranchInst(label_forbody6_i_2, label_forbody6_i_3, int1_cmp59_i_2, label_forbody6_i_2);
-
- // Block forbody6.i.3 (label_forbody6_i_3)
- Argument* fwdref_194 = new Argument(IntegerType::get(32));
- PHINode* int32_j_0_reg2mem_0_i_3 = new PHINode(IntegerType::get(32), "j.0.reg2mem.0.i.3", label_forbody6_i_3);
- int32_j_0_reg2mem_0_i_3->reserveOperandSpace(2);
- int32_j_0_reg2mem_0_i_3->addIncoming(const_int32_32, label_forbody6_i_2);
- int32_j_0_reg2mem_0_i_3->addIncoming(fwdref_194, label_forbody6_i_3);
-
- Argument* fwdref_195 = new Argument(VectorTy_13);
- PHINode* packed_vec_0_reg2mem_0_i_3 = new PHINode(VectorTy_13, "vec.0.reg2mem.0.i.3", label_forbody6_i_3);
- packed_vec_0_reg2mem_0_i_3->reserveOperandSpace(2);
- packed_vec_0_reg2mem_0_i_3->addIncoming(packed_tmp48_i_2, label_forbody6_i_2);
- packed_vec_0_reg2mem_0_i_3->addIncoming(fwdref_195, label_forbody6_i_3);
- std::vector<Value*> ptr_arraydecay11_i_3_indices;
- ptr_arraydecay11_i_3_indices.push_back(const_int32_36);
- ptr_arraydecay11_i_3_indices.push_back(int32_j_0_reg2mem_0_i_3);
- ptr_arraydecay11_i_3_indices.push_back(const_int32_32);
- Instruction* ptr_arraydecay11_i_3 = new GetElementPtrInst(ptr_ainputs_122, ptr_arraydecay11_i_3_indices.begin(), ptr_arraydecay11_i_3_indices.end(), "arraydecay11.i.3", label_forbody6_i_3);
- LoadInst* float_tmp13_i_3 = new LoadInst(ptr_arraydecay11_i_3, "tmp13.i.3", false, label_forbody6_i_3);
- InsertElementInst* packed_tmp15_i_3 = new InsertElementInst(packed_vec_0_reg2mem_0_i_3, float_tmp13_i_3, const_int32_32, "tmp15.i.3", label_forbody6_i_3);
- std::vector<Value*> ptr_arrayidx23_i_3_indices;
- ptr_arrayidx23_i_3_indices.push_back(const_int32_36);
- ptr_arrayidx23_i_3_indices.push_back(int32_j_0_reg2mem_0_i_3);
- ptr_arrayidx23_i_3_indices.push_back(const_int32_34);
- Instruction* ptr_arrayidx23_i_3 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx23_i_3_indices.begin(), ptr_arrayidx23_i_3_indices.end(), "arrayidx23.i.3", label_forbody6_i_3);
- LoadInst* float_tmp24_i_3 = new LoadInst(ptr_arrayidx23_i_3, "tmp24.i.3", false, label_forbody6_i_3);
- InsertElementInst* packed_tmp26_i_3 = new InsertElementInst(packed_tmp15_i_3, float_tmp24_i_3, const_int32_34, "tmp26.i.3", label_forbody6_i_3);
- std::vector<Value*> ptr_arrayidx34_i_3_indices;
- ptr_arrayidx34_i_3_indices.push_back(const_int32_36);
- ptr_arrayidx34_i_3_indices.push_back(int32_j_0_reg2mem_0_i_3);
- ptr_arrayidx34_i_3_indices.push_back(const_int32_35);
- Instruction* ptr_arrayidx34_i_3 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx34_i_3_indices.begin(), ptr_arrayidx34_i_3_indices.end(), "arrayidx34.i.3", label_forbody6_i_3);
- LoadInst* float_tmp35_i_3 = new LoadInst(ptr_arrayidx34_i_3, "tmp35.i.3", false, label_forbody6_i_3);
- InsertElementInst* packed_tmp37_i_3 = new InsertElementInst(packed_tmp26_i_3, float_tmp35_i_3, const_int32_35, "tmp37.i.3", label_forbody6_i_3);
- std::vector<Value*> ptr_arrayidx45_i_3_indices;
- ptr_arrayidx45_i_3_indices.push_back(const_int32_36);
- ptr_arrayidx45_i_3_indices.push_back(int32_j_0_reg2mem_0_i_3);
- ptr_arrayidx45_i_3_indices.push_back(const_int32_36);
- Instruction* ptr_arrayidx45_i_3 = new GetElementPtrInst(ptr_ainputs_122, ptr_arrayidx45_i_3_indices.begin(), ptr_arrayidx45_i_3_indices.end(), "arrayidx45.i.3", label_forbody6_i_3);
- LoadInst* float_tmp46_i_3 = new LoadInst(ptr_arrayidx45_i_3, "tmp46.i.3", false, label_forbody6_i_3);
- InsertElementInst* packed_tmp48_i_3 = new InsertElementInst(packed_tmp37_i_3, float_tmp46_i_3, const_int32_36, "tmp48.i.3", label_forbody6_i_3);
- std::vector<Value*> ptr_arrayidx54_i_3_indices;
- ptr_arrayidx54_i_3_indices.push_back(const_int32_32);
- ptr_arrayidx54_i_3_indices.push_back(const_int32_36);
- ptr_arrayidx54_i_3_indices.push_back(int32_j_0_reg2mem_0_i_3);
- Instruction* ptr_arrayidx54_i_3 = new GetElementPtrInst(ptr_inputs_132, ptr_arrayidx54_i_3_indices.begin(), ptr_arrayidx54_i_3_indices.end(), "arrayidx54.i.3", label_forbody6_i_3);
- StoreInst* void_196 = new StoreInst(packed_tmp48_i_3, ptr_arrayidx54_i_3, false, label_forbody6_i_3);
- BinaryOperator* int32_inc_i_3 = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0_i_3, const_int32_34, "inc.i.3", label_forbody6_i_3);
- ICmpInst* int1_cmp59_i_3 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_i_3, int32_num_inputs_123, "cmp59.i.3", label_forbody6_i_3);
- new BranchInst(label_forbody6_i_3, label_from_array_exit_128, int1_cmp59_i_3, label_forbody6_i_3);
+ BasicBlock* label_entry_109 = new BasicBlock("entry",func_run_fragment_shader,0);
+ BasicBlock* label_forbody_i_110 = new BasicBlock("forbody.i",func_run_fragment_shader,0);
+ BasicBlock* label_from_consts_exit_111 = new BasicBlock("from_consts.exit",func_run_fragment_shader,0);
+
+ // Block entry (label_entry_109)
+ AllocaInst* ptr_consts_112 = new AllocaInst(ArrayTy_21, "consts", label_entry_109);
+ AllocaInst* ptr_temps_113 = new AllocaInst(ArrayTy_23, "temps", label_entry_109);
+ AllocaInst* ptr_args_114 = new AllocaInst(StructTy_struct_ShaderInput, "args", label_entry_109);
+ std::vector<Value*> ptr_tmp_indices;
+ ptr_tmp_indices.push_back(const_int32_30);
+ ptr_tmp_indices.push_back(const_int32_35);
+ Instruction* ptr_tmp = new GetElementPtrInst(ptr_args_114, ptr_tmp_indices.begin(), ptr_tmp_indices.end(), "tmp", label_entry_109);
+ StoreInst* void_115 = new StoreInst(const_int32_30, ptr_tmp, false, label_entry_109);
+ ICmpInst* int1_cmp_i_116 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts_108, const_int32_30, "cmp.i", label_entry_109);
+ new BranchInst(label_forbody_i_110, label_from_consts_exit_111, int1_cmp_i_116, label_entry_109);
+
+ // Block forbody.i (label_forbody_i_110)
+ Argument* fwdref_119 = new Argument(IntegerType::get(32));
+ PHINode* int32_i_0_reg2mem_0_i_118 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i", label_forbody_i_110);
+ int32_i_0_reg2mem_0_i_118->reserveOperandSpace(2);
+ int32_i_0_reg2mem_0_i_118->addIncoming(const_int32_30, label_entry_109);
+ int32_i_0_reg2mem_0_i_118->addIncoming(fwdref_119, label_forbody_i_110);
+
+ Argument* fwdref_121 = new Argument(VectorTy_1);
+ PHINode* packed_vec_0_reg2mem_0_i_120 = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i", label_forbody_i_110);
+ packed_vec_0_reg2mem_0_i_120->reserveOperandSpace(2);
+ packed_vec_0_reg2mem_0_i_120->addIncoming(const_packed_31, label_entry_109);
+ packed_vec_0_reg2mem_0_i_120->addIncoming(fwdref_121, label_forbody_i_110);
+
+ std::vector<Value*> ptr_arraydecay_i_122_indices;
+ ptr_arraydecay_i_122_indices.push_back(int32_i_0_reg2mem_0_i_118);
+ ptr_arraydecay_i_122_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay_i_122 = new GetElementPtrInst(ptr_aconsts_107, ptr_arraydecay_i_122_indices.begin(), ptr_arraydecay_i_122_indices.end(), "arraydecay.i", label_forbody_i_110);
+ LoadInst* float_tmp5_i_123 = new LoadInst(ptr_arraydecay_i_122, "tmp5.i", false, label_forbody_i_110);
+ InsertElementInst* packed_tmp7_i_124 = new InsertElementInst(packed_vec_0_reg2mem_0_i_120, float_tmp5_i_123, const_int32_30, "tmp7.i", label_forbody_i_110);
+ std::vector<Value*> ptr_arrayidx12_i_125_indices;
+ ptr_arrayidx12_i_125_indices.push_back(int32_i_0_reg2mem_0_i_118);
+ ptr_arrayidx12_i_125_indices.push_back(const_int32_32);
+ Instruction* ptr_arrayidx12_i_125 = new GetElementPtrInst(ptr_aconsts_107, ptr_arrayidx12_i_125_indices.begin(), ptr_arrayidx12_i_125_indices.end(), "arrayidx12.i", label_forbody_i_110);
+ LoadInst* float_tmp13_i_126 = new LoadInst(ptr_arrayidx12_i_125, "tmp13.i", false, label_forbody_i_110);
+ InsertElementInst* packed_tmp15_i_127 = new InsertElementInst(packed_tmp7_i_124, float_tmp13_i_126, const_int32_32, "tmp15.i", label_forbody_i_110);
+ std::vector<Value*> ptr_arrayidx20_i_128_indices;
+ ptr_arrayidx20_i_128_indices.push_back(int32_i_0_reg2mem_0_i_118);
+ ptr_arrayidx20_i_128_indices.push_back(const_int32_33);
+ Instruction* ptr_arrayidx20_i_128 = new GetElementPtrInst(ptr_aconsts_107, ptr_arrayidx20_i_128_indices.begin(), ptr_arrayidx20_i_128_indices.end(), "arrayidx20.i", label_forbody_i_110);
+ LoadInst* float_tmp21_i_129 = new LoadInst(ptr_arrayidx20_i_128, "tmp21.i", false, label_forbody_i_110);
+ InsertElementInst* packed_tmp23_i_130 = new InsertElementInst(packed_tmp15_i_127, float_tmp21_i_129, const_int32_33, "tmp23.i", label_forbody_i_110);
+ std::vector<Value*> ptr_arrayidx28_i_131_indices;
+ ptr_arrayidx28_i_131_indices.push_back(int32_i_0_reg2mem_0_i_118);
+ ptr_arrayidx28_i_131_indices.push_back(const_int32_34);
+ Instruction* ptr_arrayidx28_i_131 = new GetElementPtrInst(ptr_aconsts_107, ptr_arrayidx28_i_131_indices.begin(), ptr_arrayidx28_i_131_indices.end(), "arrayidx28.i", label_forbody_i_110);
+ LoadInst* float_tmp29_i_132 = new LoadInst(ptr_arrayidx28_i_131, "tmp29.i", false, label_forbody_i_110);
+ InsertElementInst* packed_tmp31_i_133 = new InsertElementInst(packed_tmp23_i_130, float_tmp29_i_132, const_int32_34, "tmp31.i", label_forbody_i_110);
+ std::vector<Value*> ptr_arrayidx34_i_134_indices;
+ ptr_arrayidx34_i_134_indices.push_back(const_int32_30);
+ ptr_arrayidx34_i_134_indices.push_back(int32_i_0_reg2mem_0_i_118);
+ Instruction* ptr_arrayidx34_i_134 = new GetElementPtrInst(ptr_consts_112, ptr_arrayidx34_i_134_indices.begin(), ptr_arrayidx34_i_134_indices.end(), "arrayidx34.i", label_forbody_i_110);
+ StoreInst* void_135 = new StoreInst(packed_tmp31_i_133, ptr_arrayidx34_i_134, false, label_forbody_i_110);
+ BinaryOperator* int32_indvar_next7 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i_118, const_int32_32, "indvar.next7", label_forbody_i_110);
+ ICmpInst* int1_exitcond8 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next7, int32_num_consts_108, "exitcond8", label_forbody_i_110);
+ new BranchInst(label_from_consts_exit_111, label_forbody_i_110, int1_exitcond8, label_forbody_i_110);
+
+ // Block from_consts.exit (label_from_consts_exit_111)
+ std::vector<Value*> ptr_tmp3_indices;
+ ptr_tmp3_indices.push_back(const_int32_30);
+ ptr_tmp3_indices.push_back(const_int32_34);
+ Instruction* ptr_tmp3 = new GetElementPtrInst(ptr_args_114, ptr_tmp3_indices.begin(), ptr_tmp3_indices.end(), "tmp3", label_from_consts_exit_111);
+ std::vector<Value*> ptr_arraydecay4_indices;
+ ptr_arraydecay4_indices.push_back(const_int32_30);
+ ptr_arraydecay4_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay4 = new GetElementPtrInst(ptr_consts_112, ptr_arraydecay4_indices.begin(), ptr_arraydecay4_indices.end(), "arraydecay4", label_from_consts_exit_111);
+ StoreInst* void_137 = new StoreInst(ptr_arraydecay4, ptr_tmp3, false, label_from_consts_exit_111);
+ std::vector<Value*> ptr_tmp5_indices;
+ ptr_tmp5_indices.push_back(const_int32_30);
+ ptr_tmp5_indices.push_back(const_int32_33);
+ Instruction* ptr_tmp5 = new GetElementPtrInst(ptr_args_114, ptr_tmp5_indices.begin(), ptr_tmp5_indices.end(), "tmp5", label_from_consts_exit_111);
+ std::vector<Value*> ptr_arraydecay6_indices;
+ ptr_arraydecay6_indices.push_back(const_int32_30);
+ ptr_arraydecay6_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay6 = new GetElementPtrInst(ptr_temps_113, ptr_arraydecay6_indices.begin(), ptr_arraydecay6_indices.end(), "arraydecay6", label_from_consts_exit_111);
+ StoreInst* void_138 = new StoreInst(ptr_arraydecay6, ptr_tmp5, false, label_from_consts_exit_111);
+ std::vector<Value*> ptr_tmp8_139_indices;
+ ptr_tmp8_139_indices.push_back(const_int32_30);
+ ptr_tmp8_139_indices.push_back(const_int32_32);
+ Instruction* ptr_tmp8_139 = new GetElementPtrInst(ptr_args_114, ptr_tmp8_139_indices.begin(), ptr_tmp8_139_indices.end(), "tmp8", label_from_consts_exit_111);
+ std::vector<Value*> ptr_tmp12_140_indices;
+ ptr_tmp12_140_indices.push_back(const_int32_30);
+ ptr_tmp12_140_indices.push_back(const_int32_30);
+ Instruction* ptr_tmp12_140 = new GetElementPtrInst(ptr_args_114, ptr_tmp12_140_indices.begin(), ptr_tmp12_140_indices.end(), "tmp12", label_from_consts_exit_111);
+ std::vector<Value*> ptr_arraydecay11_141_indices;
+ ptr_arraydecay11_141_indices.push_back(const_int32_30);
+ ptr_arraydecay11_141_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay11_141 = new GetElementPtrInst(ptr_inputs_105, ptr_arraydecay11_141_indices.begin(), ptr_arraydecay11_141_indices.end(), "arraydecay11", label_from_consts_exit_111);
+ StoreInst* void_142 = new StoreInst(ptr_arraydecay11_141, ptr_tmp8_139, false, label_from_consts_exit_111);
+ std::vector<Value*> ptr_arraydecay16_143_indices;
+ ptr_arraydecay16_143_indices.push_back(const_int32_30);
+ ptr_arraydecay16_143_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay16_143 = new GetElementPtrInst(ptr_results_104, ptr_arraydecay16_143_indices.begin(), ptr_arraydecay16_143_indices.end(), "arraydecay16", label_from_consts_exit_111);
+ StoreInst* void_144 = new StoreInst(ptr_arraydecay16_143, ptr_tmp12_140, false, label_from_consts_exit_111);
+ StoreInst* void_145 = new StoreInst(const_int32_30, ptr_tmp, false, label_from_consts_exit_111);
+ CallInst* void_146 = new CallInst(func_execute_shader, ptr_args_114, "", label_from_consts_exit_111);
+ void_146->setCallingConv(CallingConv::C);
+ void_146->setTailCall(false);
+ LoadInst* int32_tmp23 = new LoadInst(ptr_tmp, "tmp23", false, label_from_consts_exit_111);
+ std::vector<Value*> ptr_arraydecay11_1_indices;
+ ptr_arraydecay11_1_indices.push_back(const_int32_32);
+ ptr_arraydecay11_1_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay11_1 = new GetElementPtrInst(ptr_inputs_105, ptr_arraydecay11_1_indices.begin(), ptr_arraydecay11_1_indices.end(), "arraydecay11.1", label_from_consts_exit_111);
+ StoreInst* void_147 = new StoreInst(ptr_arraydecay11_1, ptr_tmp8_139, false, label_from_consts_exit_111);
+ std::vector<Value*> ptr_arraydecay16_1_indices;
+ ptr_arraydecay16_1_indices.push_back(const_int32_32);
+ ptr_arraydecay16_1_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay16_1 = new GetElementPtrInst(ptr_results_104, ptr_arraydecay16_1_indices.begin(), ptr_arraydecay16_1_indices.end(), "arraydecay16.1", label_from_consts_exit_111);
+ StoreInst* void_148 = new StoreInst(ptr_arraydecay16_1, ptr_tmp12_140, false, label_from_consts_exit_111);
+ StoreInst* void_149 = new StoreInst(const_int32_30, ptr_tmp, false, label_from_consts_exit_111);
+ CallInst* void_150 = new CallInst(func_execute_shader, ptr_args_114, "", label_from_consts_exit_111);
+ void_150->setCallingConv(CallingConv::C);
+ void_150->setTailCall(false);
+ LoadInst* int32_tmp23_1 = new LoadInst(ptr_tmp, "tmp23.1", false, label_from_consts_exit_111);
+ BinaryOperator* int32_shl_1 = BinaryOperator::create(Instruction::Shl, int32_tmp23_1, const_int32_32, "shl.1", label_from_consts_exit_111);
+ BinaryOperator* int32_or_1 = BinaryOperator::create(Instruction::Or, int32_shl_1, int32_tmp23, "or.1", label_from_consts_exit_111);
+ std::vector<Value*> ptr_arraydecay11_2_indices;
+ ptr_arraydecay11_2_indices.push_back(const_int32_33);
+ ptr_arraydecay11_2_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay11_2 = new GetElementPtrInst(ptr_inputs_105, ptr_arraydecay11_2_indices.begin(), ptr_arraydecay11_2_indices.end(), "arraydecay11.2", label_from_consts_exit_111);
+ StoreInst* void_151 = new StoreInst(ptr_arraydecay11_2, ptr_tmp8_139, false, label_from_consts_exit_111);
+ std::vector<Value*> ptr_arraydecay16_2_indices;
+ ptr_arraydecay16_2_indices.push_back(const_int32_33);
+ ptr_arraydecay16_2_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay16_2 = new GetElementPtrInst(ptr_results_104, ptr_arraydecay16_2_indices.begin(), ptr_arraydecay16_2_indices.end(), "arraydecay16.2", label_from_consts_exit_111);
+ StoreInst* void_152 = new StoreInst(ptr_arraydecay16_2, ptr_tmp12_140, false, label_from_consts_exit_111);
+ StoreInst* void_153 = new StoreInst(const_int32_30, ptr_tmp, false, label_from_consts_exit_111);
+ CallInst* void_154 = new CallInst(func_execute_shader, ptr_args_114, "", label_from_consts_exit_111);
+ void_154->setCallingConv(CallingConv::C);
+ void_154->setTailCall(false);
+ LoadInst* int32_tmp23_2 = new LoadInst(ptr_tmp, "tmp23.2", false, label_from_consts_exit_111);
+ BinaryOperator* int32_shl_2 = BinaryOperator::create(Instruction::Shl, int32_tmp23_2, const_int32_33, "shl.2", label_from_consts_exit_111);
+ BinaryOperator* int32_or_2 = BinaryOperator::create(Instruction::Or, int32_shl_2, int32_or_1, "or.2", label_from_consts_exit_111);
+ std::vector<Value*> ptr_arraydecay11_3_indices;
+ ptr_arraydecay11_3_indices.push_back(const_int32_34);
+ ptr_arraydecay11_3_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay11_3 = new GetElementPtrInst(ptr_inputs_105, ptr_arraydecay11_3_indices.begin(), ptr_arraydecay11_3_indices.end(), "arraydecay11.3", label_from_consts_exit_111);
+ StoreInst* void_155 = new StoreInst(ptr_arraydecay11_3, ptr_tmp8_139, false, label_from_consts_exit_111);
+ std::vector<Value*> ptr_arraydecay16_3_indices;
+ ptr_arraydecay16_3_indices.push_back(const_int32_34);
+ ptr_arraydecay16_3_indices.push_back(const_int32_30);
+ Instruction* ptr_arraydecay16_3 = new GetElementPtrInst(ptr_results_104, ptr_arraydecay16_3_indices.begin(), ptr_arraydecay16_3_indices.end(), "arraydecay16.3", label_from_consts_exit_111);
+ StoreInst* void_156 = new StoreInst(ptr_arraydecay16_3, ptr_tmp12_140, false, label_from_consts_exit_111);
+ StoreInst* void_157 = new StoreInst(const_int32_30, ptr_tmp, false, label_from_consts_exit_111);
+ CallInst* void_158 = new CallInst(func_execute_shader, ptr_args_114, "", label_from_consts_exit_111);
+ void_158->setCallingConv(CallingConv::C);
+ void_158->setTailCall(false);
+ LoadInst* int32_tmp23_3 = new LoadInst(ptr_tmp, "tmp23.3", false, label_from_consts_exit_111);
+ BinaryOperator* int32_shl_3 = BinaryOperator::create(Instruction::Shl, int32_tmp23_3, const_int32_34, "shl.3", label_from_consts_exit_111);
+ BinaryOperator* int32_or_3 = BinaryOperator::create(Instruction::Or, int32_shl_3, int32_or_2, "or.3", label_from_consts_exit_111);
+ BinaryOperator* int32_neg = BinaryOperator::create(Instruction::Xor, int32_or_3, const_int32_36, "neg", label_from_consts_exit_111);
+ new ReturnInst(int32_neg, label_from_consts_exit_111);
// Resolve Forward References
- fwdref_195->replaceAllUsesWith(packed_tmp48_i_3); delete fwdref_195;
- fwdref_194->replaceAllUsesWith(int32_inc_i_3); delete fwdref_194;
- fwdref_141->replaceAllUsesWith(packed_tmp48_i_153); delete fwdref_141;
- fwdref_139->replaceAllUsesWith(int32_inc_i_156); delete fwdref_139;
- fwdref_162->replaceAllUsesWith(packed_tmp31_i_169); delete fwdref_162;
- fwdref_161->replaceAllUsesWith(int32_indvar_next22); delete fwdref_161;
- fwdref_174->replaceAllUsesWith(int32_indvar_next20); delete fwdref_174;
- fwdref_187->replaceAllUsesWith(packed_tmp48_i_1); delete fwdref_187;
- fwdref_186->replaceAllUsesWith(int32_inc_i_1); delete fwdref_186;
- fwdref_191->replaceAllUsesWith(packed_tmp48_i_2); delete fwdref_191;
- fwdref_190->replaceAllUsesWith(int32_inc_i_2); delete fwdref_190;
+ fwdref_121->replaceAllUsesWith(packed_tmp31_i_133); delete fwdref_121;
+ fwdref_119->replaceAllUsesWith(int32_indvar_next7); delete fwdref_119;
}
diff --git a/src/mesa/pipe/llvm/llvm_builtins.c b/src/mesa/pipe/llvm/llvm_builtins.c
index 517aa2e84b..4f98d754ba 100644
--- a/src/mesa/pipe/llvm/llvm_builtins.c
+++ b/src/mesa/pipe/llvm/llvm_builtins.c
@@ -32,7 +32,6 @@
*/
typedef __attribute__(( ocu_vector_type(4) )) float float4;
-
extern float powf(float a, float b);
inline float approx(float a, float b)
@@ -106,3 +105,11 @@ inline float4 vsin(float4 val)
result.w = res;
return result;
}
+
+inline int kilp(float4 val)
+{
+ if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0)
+ return 1;
+ else
+ return 0;
+}
diff --git a/src/mesa/pipe/llvm/llvm_entry.c b/src/mesa/pipe/llvm/llvm_entry.c
index fe32e7810d..909bef340a 100644
--- a/src/mesa/pipe/llvm/llvm_entry.c
+++ b/src/mesa/pipe/llvm/llvm_entry.c
@@ -1,4 +1,4 @@
-/* clang --emit-llvm llvm_builtins.c |llvm-as |opt -std-compile-opts |llvm2cpp -for=Shader -gen-module -funcname=createBaseShader */
+/* clang --emit-llvm llvm_entry.c |llvm-as |opt -std-compile-opts |llvm2cpp -for=Shader -gen-module -funcname=createBaseShader */
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
@@ -68,6 +68,7 @@ compute_clipmask(float4 clip, float4 (*plane), unsigned nr)
return mask;
}
+
inline void collect_results(float4 *results, struct vertex_header *vOut,
float4 *planes, int nr_planes,
float4 scale, float4 trans,
@@ -76,7 +77,6 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
/* store results */
unsigned slot;
float x, y, z, w;
-
/* Handle attr[0] (position) specially:
*/
float4 res0 = results[0];
@@ -85,7 +85,6 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
y = clip[1] = res0.y;
z = clip[2] = res0.z;
w = clip[3] = res0.w;
-
vOut->clipmask = compute_clipmask(res0, planes, nr_planes);
vOut->edgeflag = 1;
@@ -164,31 +163,39 @@ void to_array(float (*dests)[4], float4 *in, int num_attribs)
}
}
-extern void execute_shader(float4 dests[16], float4 inputs[16],
- float4 consts[32], float4 temps[128]);
-void run_vertex_shader(float (*ainputs)[16][4],
- float (*dests)[16][4],
+struct ShaderInput
+{
+ float4 *dests;
+ float4 *inputs;
+ float4 *temps;
+ float4 *consts;
+ int kilmask;
+};
+
+extern void execute_shader(struct ShaderInput *input);
+
+void run_vertex_shader(float4 (*inputs)[16],
+ float4 (*results)[16],
float (*aconsts)[4],
int num_vertices,
int num_inputs,
int num_attribs,
int num_consts)
{
- float4 inputs[16*32*4][16];
float4 consts[32];
- float4 results[16*32*4][16];
float4 temps[128];//MAX_PROGRAM_TEMPS
+ struct ShaderInput args;
/*printf("XXX LLVM run_vertex_shader vertices = %d, inputs = %d, attribs = %d, consts = %d\n",
num_vertices, num_inputs, num_attribs, num_consts);*/
- from_array(inputs, ainputs, num_vertices, num_inputs);
from_consts(consts, aconsts, num_consts);
+ args.consts = consts;
+ args.temps = temps;
for (int i = 0; i < num_vertices; ++i) {
- float4 *in = inputs[i];
- float4 *res = results[i];
- execute_shader(res, in, consts, temps);
- to_array(dests[i], res, num_attribs);
+ args.dests = results[i];
+ args.inputs = inputs[i];
+ execute_shader(&args);
}
}
@@ -213,30 +220,33 @@ struct tgsi_sampler
struct softpipe_tile_cache *cache;
};
+
int run_fragment_shader(float x, float y,
- float (*dests)[16][4],
- float (*ainputs)[16][4],
+ float4 (*results)[16],
+ float4 (*inputs)[16],
int num_inputs,
float (*aconsts)[4],
int num_consts,
- struct tgsi_sampler *samplers,
- unsigned *sampler_units)
+ struct tgsi_sampler *samplers)
{
- float4 inputs[4][16];
float4 consts[32];
- float4 results[4][16];
float4 temps[128];//MAX_PROGRAM_TEMPS
- int kilmask = 0;
+ struct ShaderInput args;
+ int mask = 0;
+ args.kilmask = 0;
- from_array(inputs, ainputs, 4, num_inputs);
from_consts(consts, aconsts, num_consts);
+ args.consts = consts;
+ args.temps = temps;
//printf("AAAAAAAAAAAAAAAAAAAAAAA FRAGMENT SHADER %f %f\n", x, y);
for (int i = 0; i < 4; ++i) {
- float4 *in = inputs[i];
- float4 *res = results[i];
- execute_shader(res, in, consts, temps);
- to_array(dests[i], res, 2);
+ args.inputs = inputs[i];
+ args.dests = results[i];
+ mask = args.kilmask;
+ args.kilmask = 0;
+ execute_shader(&args);
+ args.kilmask = mask | (args.kilmask << i);
}
- return ~kilmask;
+ return ~args.kilmask;
}
diff --git a/src/mesa/pipe/llvm/storage.cpp b/src/mesa/pipe/llvm/storage.cpp
index 1715bd4de4..08b9d29a24 100644
--- a/src/mesa/pipe/llvm/storage.cpp
+++ b/src/mesa/pipe/llvm/storage.cpp
@@ -33,7 +33,7 @@
#include "storage.h"
-#include "pipe/tgsi/exec/tgsi_token.h"
+#include "pipe/p_shader_tokens.h"
#include <llvm/BasicBlock.h>
#include <llvm/Module.h>
#include <llvm/Value.h>
@@ -46,12 +46,10 @@
using namespace llvm;
-Storage::Storage(llvm::BasicBlock *block, llvm::Value *out,
- llvm::Value *in, llvm::Value *consts, llvm::Value *temps)
- : m_block(block), m_OUT(out),
- m_IN(in), m_CONST(consts), m_TEMPS(temps),
+Storage::Storage(llvm::BasicBlock *block, llvm::Value *input)
+ : m_block(block),
+ m_INPUT(input),
m_addrs(32),
- m_dstCache(32),
m_idx(0)
{
m_floatVecType = VectorType::get(Type::FloatTy, 4);
@@ -112,26 +110,8 @@ llvm::ConstantInt *Storage::constantInt(int idx)
llvm::Value *Storage::inputElement(int idx, llvm::Value *indIdx)
{
- GetElementPtrInst *getElem = 0;
-
- if (indIdx) {
- getElem = new GetElementPtrInst(m_IN,
- BinaryOperator::create(Instruction::Add,
- indIdx,
- constantInt(idx),
- name("add"),
- m_block),
- name("input_ptr"),
- m_block);
- } else {
- getElem = new GetElementPtrInst(m_IN,
- constantInt(idx),
- name("input_ptr"),
- m_block);
- }
-
- LoadInst *load = new LoadInst(getElem, name("input"),
- false, m_block);
+ Value *val = element(InputsArg, idx, indIdx);
+ LoadInst *load = new LoadInst(val, name("input"), false, m_block);
load->setAlignment(8);
return load;
@@ -141,24 +121,8 @@ llvm::Value *Storage::constElement(int idx, llvm::Value *indIdx)
{
m_numConsts = ((idx + 1) > m_numConsts) ? (idx + 1) : m_numConsts;
- GetElementPtrInst *getElem = 0;
-
- if (indIdx)
- getElem = new GetElementPtrInst(m_CONST,
- BinaryOperator::create(Instruction::Add,
- indIdx,
- constantInt(idx),
- name("add"),
- m_block),
- name("const_ptr"),
- m_block);
- else
- getElem = new GetElementPtrInst(m_CONST,
- constantInt(idx),
- name("const_ptr"),
- m_block);
- LoadInst *load = new LoadInst(getElem, name("const"),
- false, m_block);
+ Value *elem = element(ConstsArg, idx, indIdx);
+ LoadInst *load = new LoadInst(elem, name("const"), false, m_block);
load->setAlignment(8);
return load;
}
@@ -175,26 +139,9 @@ llvm::Value *Storage::shuffleVector(llvm::Value *vec, int shuffle)
llvm::Value *Storage::tempElement(int idx, llvm::Value *indIdx)
{
- GetElementPtrInst *getElem = 0;
-
- if (indIdx) {
- getElem = new GetElementPtrInst(m_TEMPS,
- BinaryOperator::create(Instruction::Add,
- indIdx,
- constantInt(idx),
- name("add"),
- m_block),
- name("temp_ptr"),
- m_block);
- } else {
- getElem = new GetElementPtrInst(m_TEMPS,
- constantInt(idx),
- name("temp_ptr"),
- m_block);
- }
+ Value *elem = element(TempsArg, idx, indIdx);
- LoadInst *load = new LoadInst(getElem, name("temp"),
- false, m_block);
+ LoadInst *load = new LoadInst(elem, name("temp"), false, m_block);
load->setAlignment(8);
return load;
@@ -208,11 +155,8 @@ void Storage::setTempElement(int idx, llvm::Value *val, int mask)
templ = tempElement(idx);
val = maskWrite(val, mask, templ);
}
- GetElementPtrInst *getElem = new GetElementPtrInst(m_TEMPS,
- constantInt(idx),
- name("temp_ptr"),
- m_block);
- StoreInst *st = new StoreInst(val, getElem, false, m_block);
+ Value *elem = element(TempsArg, idx);
+ StoreInst *st = new StoreInst(val, elem, false, m_block);
st->setAlignment(8);
m_tempWriteMap[idx] = true;
}
@@ -226,11 +170,8 @@ void Storage::setOutputElement(int dstIdx, llvm::Value *val, int mask)
val = maskWrite(val, mask, templ);
}
- GetElementPtrInst *getElem = new GetElementPtrInst(m_OUT,
- constantInt(dstIdx),
- name("out_ptr"),
- m_block);
- StoreInst *st = new StoreInst(val, getElem, false, m_block);
+ Value *elem = element(DestsArg, dstIdx);
+ StoreInst *st = new StoreInst(val, elem, false, m_block);
st->setAlignment(8);
m_destWriteMap[dstIdx] = true;
}
@@ -310,26 +251,8 @@ void Storage::setCurrentBlock(llvm::BasicBlock *block)
llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx)
{
- GetElementPtrInst *getElem = 0;
-
- if (indIdx) {
- getElem = new GetElementPtrInst(m_OUT,
- BinaryOperator::create(Instruction::Add,
- indIdx,
- constantInt(idx),
- name("add"),
- m_block),
- name("output_ptr"),
- m_block);
- } else {
- getElem = new GetElementPtrInst(m_OUT,
- constantInt(idx),
- name("output_ptr"),
- m_block);
- }
-
- LoadInst *load = new LoadInst(getElem, name("output"),
- false, m_block);
+ Value *elem = element(DestsArg, idx, indIdx);
+ LoadInst *load = new LoadInst(elem, name("output"), false, m_block);
load->setAlignment(8);
return load;
@@ -337,47 +260,19 @@ llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx)
llvm::Value * Storage::inputPtr() const
{
- return m_IN;
-}
-
-llvm::Value * Storage::outputPtr() const
-{
- return m_OUT;
+ return m_INPUT;
}
-llvm::Value * Storage::constPtr() const
+void Storage::pushArguments(llvm::Value *input)
{
- return m_CONST;
-}
+ m_argStack.push(m_INPUT);
-llvm::Value * Storage::tempPtr() const
-{
- return m_TEMPS;
-}
-
-void Storage::pushArguments(llvm::Value *out, llvm::Value *in,
- llvm::Value *constPtr, llvm::Value *temp)
-{
- Args arg;
- arg.out = m_OUT;
- arg.in = m_IN;
- arg.cst = m_CONST;
- arg.temp = m_TEMPS;
- m_argStack.push(arg);
-
- m_OUT = out;
- m_IN = in;
- m_CONST = constPtr;
- m_TEMPS = temp;
+ m_INPUT = input;
}
void Storage::popArguments()
{
- Args arg = m_argStack.top();
- m_OUT = arg.out;
- m_IN = arg.in;
- m_CONST = arg.cst;
- m_TEMPS = arg.temp;
+ m_INPUT = m_argStack.top();
m_argStack.pop();
}
@@ -405,4 +300,63 @@ void Storage::addImmediate(float *val)
m_immediates.push_back(ConstantVector::get(m_floatVecType, vec));
}
+
+llvm::Value * Storage::elemPtr(Args arg)
+{
+ std::vector<Value*> indices;
+ indices.push_back(constantInt(0));
+ indices.push_back(constantInt(static_cast<int>(arg)));
+ GetElementPtrInst *getElem = new GetElementPtrInst(m_INPUT,
+ indices.begin(),
+ indices.end(),
+ name("input_ptr"),
+ m_block);
+ return new LoadInst(getElem, name("input_field"), false, m_block);
+}
+
+llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx,
+ llvm::Value *indIdx )
+{
+ GetElementPtrInst *getElem = 0;
+
+ if (indIdx) {
+ getElem = new GetElementPtrInst(ptr,
+ BinaryOperator::create(Instruction::Add,
+ indIdx,
+ constantInt(idx),
+ name("add"),
+ m_block),
+ name("field"),
+ m_block);
+ } else {
+ getElem = new GetElementPtrInst(ptr,
+ constantInt(idx),
+ name("field"),
+ m_block);
+ }
+ return getElem;
+}
+
+llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx )
+{
+ Value *val = elemPtr(arg);
+ return elemIdx(val, idx, indIdx);
+}
+
+void Storage::setKilElement(llvm::Value *val)
+{
+ std::vector<Value*> indices;
+ indices.push_back(constantInt(0));
+ indices.push_back(constantInt(static_cast<int>(KilArg)));
+ GetElementPtrInst *elem = new GetElementPtrInst(m_INPUT,
+ indices.begin(),
+ indices.end(),
+ name("kil_ptr"),
+ m_block);
+ StoreInst *st = new StoreInst(val, elem, false, m_block);
+ st->setAlignment(8);
+}
+
#endif //MESA_LLVM
+
+
diff --git a/src/mesa/pipe/llvm/storage.h b/src/mesa/pipe/llvm/storage.h
index aa02f02f85..8574f7554e 100644
--- a/src/mesa/pipe/llvm/storage.h
+++ b/src/mesa/pipe/llvm/storage.h
@@ -51,13 +51,9 @@ class Storage
{
public:
Storage(llvm::BasicBlock *block,
- llvm::Value *out,
- llvm::Value *in, llvm::Value *consts, llvm::Value *temps);
+ llvm::Value *input);
llvm::Value *inputPtr() const;
- llvm::Value *outputPtr() const;
- llvm::Value *constPtr() const;
- llvm::Value *tempPtr() const;
void setCurrentBlock(llvm::BasicBlock *block);
@@ -75,14 +71,15 @@ public:
llvm::Value *addrElement(int idx) const;
void setAddrElement(int idx, llvm::Value *val, int mask);
+ void setKilElement(llvm::Value *val);
+
llvm::Value *shuffleVector(llvm::Value *vec, int shuffle);
llvm::Value *extractIndex(llvm::Value *vec);
int numConsts() const;
- void pushArguments(llvm::Value *out, llvm::Value *in,
- llvm::Value *constPtr, llvm::Value *temp);
+ void pushArguments(llvm::Value *input);
void popArguments();
void pushTemps();
void popTemps();
@@ -93,17 +90,25 @@ private:
llvm::Value *maskWrite(llvm::Value *src, int mask, llvm::Value *templ);
const char *name(const char *prefix);
+ enum Args {
+ DestsArg = 0,
+ InputsArg = 1,
+ TempsArg = 2,
+ ConstsArg = 3,
+ KilArg = 4
+ };
+ llvm::Value *elemPtr(Args arg);
+ llvm::Value *elemIdx(llvm::Value *ptr, int idx,
+ llvm::Value *indIdx = 0);
+ llvm::Value *element(Args arg, int idx, llvm::Value *indIdx = 0);
+
private:
llvm::BasicBlock *m_block;
- llvm::Value *m_OUT;
- llvm::Value *m_IN;
- llvm::Value *m_CONST;
- llvm::Value *m_TEMPS;
+ llvm::Value *m_INPUT;
std::map<int, llvm::ConstantInt*> m_constInts;
std::map<int, llvm::Constant*> m_intVecs;
std::vector<llvm::Value*> m_addrs;
- std::vector<llvm::Value*> m_dstCache;
std::vector<llvm::Constant*> m_immediates;
llvm::VectorType *m_floatVecType;
@@ -121,13 +126,7 @@ private:
llvm::Value *m_undefIntVec;
llvm::Value *m_extSwizzleVec;
- struct Args {
- llvm::Value *out;
- llvm::Value *in;
- llvm::Value *cst;
- llvm::Value *temp;
- };
- std::stack<Args> m_argStack;
+ std::stack<llvm::Value*> m_argStack;
std::stack<std::vector<llvm::Value*> > m_tempStack;
};
diff --git a/src/mesa/pipe/p_compiler.h b/src/mesa/pipe/p_compiler.h
index 020977b05b..4f2c9ef88a 100644
--- a/src/mesa/pipe/p_compiler.h
+++ b/src/mesa/pipe/p_compiler.h
@@ -73,9 +73,11 @@ typedef unsigned long long uint64;
#if defined __GNUC__
#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) ))
#define ALIGN16_ASSIGN(NAME) NAME##___aligned
+#define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) ))
#else
#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1]
#define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned)
+#define ALIGN16_ATTRIB
#endif
diff --git a/src/mesa/pipe/p_context.h b/src/mesa/pipe/p_context.h
index 00379fbacf..92ca7dd8e3 100644
--- a/src/mesa/pipe/p_context.h
+++ b/src/mesa/pipe/p_context.h
@@ -29,9 +29,13 @@
#define PIPE_CONTEXT_H
#include "p_state.h"
+#include <stdint.h>
struct pipe_state_cache;
+/* Opaque driver handles:
+ */
+struct pipe_query;
/**
* Gallium rendering context. Basically:
@@ -81,9 +85,19 @@ struct pipe_context {
/**
* Query objects
*/
- void (*begin_query)(struct pipe_context *pipe, struct pipe_query_object *q);
- void (*end_query)(struct pipe_context *pipe, struct pipe_query_object *q);
- void (*wait_query)(struct pipe_context *pipe, struct pipe_query_object *q);
+ struct pipe_query *(*create_query)( struct pipe_context *pipe,
+ unsigned query_type );
+
+ void (*destroy_query)(struct pipe_context *pipe,
+ struct pipe_query *q);
+
+ void (*begin_query)(struct pipe_context *pipe, struct pipe_query *q);
+ void (*end_query)(struct pipe_context *pipe, struct pipe_query *q);
+
+ boolean (*get_query_result)(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ uint64_t *result);
/*
* State functions
@@ -132,31 +146,25 @@ struct pipe_context {
void (*set_clip_state)( struct pipe_context *,
const struct pipe_clip_state * );
- void (*set_clear_color_state)( struct pipe_context *,
- const struct pipe_clear_color_state * );
-
void (*set_constant_buffer)( struct pipe_context *,
uint shader, uint index,
const struct pipe_constant_buffer *buf );
- void (*set_feedback_state)( struct pipe_context *,
- const struct pipe_feedback_state *);
-
void (*set_framebuffer_state)( struct pipe_context *,
const struct pipe_framebuffer_state * );
void (*set_polygon_stipple)( struct pipe_context *,
const struct pipe_poly_stipple * );
- void (*set_sampler_units)( struct pipe_context *,
- uint num_samplers, const uint *units );
-
void (*set_scissor_state)( struct pipe_context *,
const struct pipe_scissor_state * );
- void (*set_texture_state)( struct pipe_context *,
- unsigned unit,
- struct pipe_texture * );
+
+ /* Currently a sampler is constrained to sample from a single texture:
+ */
+ void (*set_sampler_texture)( struct pipe_context *,
+ unsigned sampler,
+ struct pipe_texture * );
void (*set_viewport_state)( struct pipe_context *,
const struct pipe_viewport_state * );
@@ -172,12 +180,6 @@ struct pipe_context {
unsigned index,
const struct pipe_vertex_element * );
- /*
- * Vertex feedback
- */
- void (*set_feedback_buffer)(struct pipe_context *,
- unsigned index,
- const struct pipe_feedback_buffer *);
/** Get a surface which is a "view" into a texture */
struct pipe_surface *(*get_tex_surface)(struct pipe_context *pipe,
diff --git a/src/mesa/pipe/p_format.h b/src/mesa/pipe/p_format.h
index 8f11bfab76..95a50c18af 100644
--- a/src/mesa/pipe/p_format.h
+++ b/src/mesa/pipe/p_format.h
@@ -366,4 +366,51 @@ static INLINE char *pf_sprint_name( char *str, uint format )
return str;
}
+static INLINE uint pf_get_component_bits( uint format, uint comp )
+{
+ uint size;
+
+ if (pf_swizzle_x(format) == comp) {
+ size = pf_size_x(format);
+ }
+ else if (pf_swizzle_y(format) == comp) {
+ size = pf_size_y(format);
+ }
+ else if (pf_swizzle_z(format) == comp) {
+ size = pf_size_z(format);
+ }
+ else if (pf_swizzle_w(format) == comp) {
+ size = pf_size_w(format);
+ }
+ else {
+ size = 0;
+ }
+ return size << (pf_exp8(format) * 3);
+}
+
+static INLINE uint pf_get_bits( uint format )
+{
+ if (pf_layout(format) == PIPE_FORMAT_LAYOUT_RGBAZS) {
+ return
+ pf_get_component_bits( format, PIPE_FORMAT_COMP_R ) +
+ pf_get_component_bits( format, PIPE_FORMAT_COMP_G ) +
+ pf_get_component_bits( format, PIPE_FORMAT_COMP_B ) +
+ pf_get_component_bits( format, PIPE_FORMAT_COMP_A ) +
+ pf_get_component_bits( format, PIPE_FORMAT_COMP_Z ) +
+ pf_get_component_bits( format, PIPE_FORMAT_COMP_S );
+ }
+ else {
+ assert( pf_layout(format) == PIPE_FORMAT_LAYOUT_YCBCR );
+
+ /* TODO */
+ assert( 0 );
+ return 0;
+ }
+}
+
+static INLINE uint pf_get_size( uint format ) {
+ assert(pf_get_bits(format) % 8 == 0);
+ return pf_get_bits(format) / 8;
+}
+
#endif
diff --git a/src/mesa/pipe/p_state.h b/src/mesa/pipe/p_state.h
index 6db9bbc953..4e42838f1d 100644
--- a/src/mesa/pipe/p_state.h
+++ b/src/mesa/pipe/p_state.h
@@ -103,19 +103,6 @@ struct pipe_rasterizer_state
};
-/**
- * Post-transform vertex feeback
- */
-struct pipe_feedback_state {
- uint enabled:1; /**< enable feedback? */
- uint discard:1; /**< discard primitives? */
- uint interleaved:1; /**< interleaved output? */
- uint num_attribs;
- uint attrib[PIPE_MAX_FEEDBACK_ATTRIBS];
- uint size[PIPE_MAX_FEEDBACK_ATTRIBS];
-};
-
-
struct pipe_poly_stipple {
unsigned stipple[32];
};
@@ -165,7 +152,6 @@ struct pipe_depth_stencil_state
unsigned writemask:1; /**< allow depth buffer writes? */
unsigned func:3; /**< depth test func (PIPE_FUNC_x) */
unsigned occlusion_count:1; /**< XXX move this elsewhere? */
- float clear; /**< Clear value in [0,1] (XXX correct place?) */
} depth;
struct {
unsigned front_enabled:1;
@@ -181,7 +167,6 @@ struct pipe_depth_stencil_state
ubyte ref_value[2]; /**< [0] = front, [1] = back */
ubyte value_mask[2];
ubyte write_mask[2];
- ubyte clear_value;
} stencil;
};
@@ -213,11 +198,6 @@ struct pipe_blend_color {
float color[4];
};
-struct pipe_clear_color_state
-{
- float color[4];
-};
-
struct pipe_framebuffer_state
{
/** multiple colorbuffers for multiple render targets */
@@ -284,10 +264,9 @@ struct pipe_texture
{
/* Effectively the key:
*/
- unsigned target; /* XXX convert to PIPE_TEXTURE_x */
- unsigned internal_format; /* XXX convert to PIPE_FORMAT_x */
-
+ unsigned target; /**< PIPE_TEXTURE_x */
enum pipe_format format; /**< PIPE_FORMAT_x */
+
unsigned first_level;
unsigned last_level;
@@ -336,24 +315,5 @@ struct pipe_vertex_element
};
-/**
- * Vertex feedback buffer
- */
-struct pipe_feedback_buffer {
- struct pipe_buffer_handle *buffer;
- unsigned size;
- unsigned start_offset;
-};
-
-
-/**
- * Hardware queries (occlusion, transform feedback, timing, etc)
- */
-struct pipe_query_object {
- uint type:3; /**< PIPE_QUERY_x */
- uint ready:1; /**< is result ready? */
- uint64 count;
-};
-
#endif
diff --git a/src/mesa/pipe/p_util.h b/src/mesa/pipe/p_util.h
index 3c5e98453c..e6d284d932 100644
--- a/src/mesa/pipe/p_util.h
+++ b/src/mesa/pipe/p_util.h
@@ -30,6 +30,8 @@
#include "p_compiler.h"
#include <math.h>
+#include <stdint.h>
+
#ifdef WIN32
@@ -93,7 +95,7 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size )
#define GETENV( X ) NULL
-#else // WIN32
+#else /* WIN32 */
#define MALLOC( SIZE ) malloc( SIZE )
@@ -105,10 +107,55 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size )
#define GETENV( X ) getenv( X )
-#endif // WIN32
+#endif /* WIN32 */
#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T))
+
+
+/**
+ * Return memory on given byte alignment
+ */
+static INLINE void *
+align_malloc(size_t bytes, uint alignment)
+{
+#if defined(HAVE_POSIX_MEMALIGN)
+ void *mem;
+ (void) posix_memalign(& mem, alignment, bytes);
+ return mem;
+#else
+ uintptr_t ptr, buf;
+
+ assert( alignment > 0 );
+
+ ptr = (uintptr_t) MALLOC(bytes + alignment + sizeof(void *));
+ if (!ptr)
+ return NULL;
+
+ buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
+ *(uintptr_t *)(buf - sizeof(void *)) = ptr;
+
+ return (void *) buf;
+#endif /* defined(HAVE_POSIX_MEMALIGN) */
+}
+
+/**
+ * Free memory returned by align_malloc().
+ */
+static INLINE void
+align_free(void *ptr)
+{
+#if defined(HAVE_POSIX_MEMALIGN)
+ FREE(ptr);
+#else
+ void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
+ void *realAddr = *cubbyHole;
+ FREE(realAddr);
+#endif /* defined(HAVE_POSIX_MEMALIGN) */
+}
+
+
+
#define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) )
#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) )
#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) )
@@ -122,13 +169,25 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size )
static INLINE void *
align16( void *unaligned )
{
- union {
- void *p;
- uint64 u;
- } pu;
- pu.p = unaligned;
- pu.u = (pu.u + 15) & ~15;
- return pu.p;
+ if (sizeof(void *) == 64) {
+ union {
+ void *p;
+ uint64 u;
+ } pu;
+ pu.p = unaligned;
+ pu.u = (pu.u + 15) & ~15;
+ return pu.p;
+ }
+ else {
+ /* 32-bit pointers */
+ union {
+ void *p;
+ uint u;
+ } pu;
+ pu.p = unaligned;
+ pu.u = (pu.u + 15) & ~15;
+ return pu.p;
+ }
}
diff --git a/src/mesa/pipe/p_winsys.h b/src/mesa/pipe/p_winsys.h
index 1418af6918..aa9362ec0b 100644
--- a/src/mesa/pipe/p_winsys.h
+++ b/src/mesa/pipe/p_winsys.h
@@ -76,16 +76,16 @@ struct pipe_winsys
const char *, ... );
- /**
- * flags is bitmask of PIPE_SURFACE_FLAG_RENDER, PIPE_SURFACE_FLAG_TEXTURE
- */
- unsigned (*surface_pitch)(struct pipe_winsys *ws, unsigned cpp,
- unsigned with, unsigned flags);
-
/** allocate a new surface (no context dependency) */
- struct pipe_surface *(*surface_alloc)(struct pipe_winsys *ws,
- enum pipe_format format);
-
+ struct pipe_surface *(*surface_alloc)(struct pipe_winsys *ws);
+
+ /** allocate storage for a pipe_surface */
+ int (*surface_alloc_storage)(struct pipe_winsys *ws,
+ struct pipe_surface *surf,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned flags);
+
void (*surface_release)(struct pipe_winsys *ws, struct pipe_surface **s);
diff --git a/src/mesa/pipe/softpipe/Makefile b/src/mesa/pipe/softpipe/Makefile
index 647cc05373..31438a882e 100644
--- a/src/mesa/pipe/softpipe/Makefile
+++ b/src/mesa/pipe/softpipe/Makefile
@@ -7,6 +7,7 @@ LIBNAME = softpipe
DRIVER_SOURCES = \
sp_clear.c \
sp_flush.c \
+ sp_query.c \
sp_context.c \
sp_draw_arrays.c \
sp_prim_setup.c \
@@ -27,7 +28,6 @@ DRIVER_SOURCES = \
sp_state_blend.c \
sp_state_clip.c \
sp_state_derived.c \
- sp_state_feedback.c \
sp_state_fs.c \
sp_state_sampler.c \
sp_state_rasterizer.c \
diff --git a/src/mesa/pipe/softpipe/sp_context.c b/src/mesa/pipe/softpipe/sp_context.c
index 8b8e04c2f9..107c8f8597 100644
--- a/src/mesa/pipe/softpipe/sp_context.c
+++ b/src/mesa/pipe/softpipe/sp_context.c
@@ -42,6 +42,7 @@
#include "sp_tile_cache.h"
#include "sp_texture.h"
#include "sp_winsys.h"
+#include "sp_query.h"
@@ -150,37 +151,6 @@ static void softpipe_destroy( struct pipe_context *pipe )
}
-static void
-softpipe_begin_query(struct pipe_context *pipe, struct pipe_query_object *q)
-{
- struct softpipe_context *softpipe = softpipe_context( pipe );
- assert(q->type < PIPE_QUERY_TYPES);
- assert(!softpipe->queries[q->type]);
- softpipe->queries[q->type] = q;
-}
-
-
-static void
-softpipe_end_query(struct pipe_context *pipe, struct pipe_query_object *q)
-{
- struct softpipe_context *softpipe = softpipe_context( pipe );
- assert(q->type < PIPE_QUERY_TYPES);
- assert(softpipe->queries[q->type]);
- q->ready = 1; /* software rendering is synchronous */
- softpipe->queries[q->type] = NULL;
-}
-
-
-static void
-softpipe_wait_query(struct pipe_context *pipe, struct pipe_query_object *q)
-{
- /* Should never get here since we indicated that the result was
- * ready in softpipe_end_query().
- */
- assert(0);
-}
-
-
static const char *softpipe_get_name( struct pipe_context *pipe )
{
return "softpipe";
@@ -304,19 +274,15 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys,
softpipe->pipe.set_blend_color = softpipe_set_blend_color;
softpipe->pipe.set_clip_state = softpipe_set_clip_state;
- softpipe->pipe.set_clear_color_state = softpipe_set_clear_color_state;
softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer;
- softpipe->pipe.set_feedback_state = softpipe_set_feedback_state;
softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state;
softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple;
- softpipe->pipe.set_sampler_units = softpipe_set_sampler_units;
softpipe->pipe.set_scissor_state = softpipe_set_scissor_state;
- softpipe->pipe.set_texture_state = softpipe_set_texture_state;
+ softpipe->pipe.set_sampler_texture = softpipe_set_sampler_texture;
softpipe->pipe.set_viewport_state = softpipe_set_viewport_state;
softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer;
softpipe->pipe.set_vertex_element = softpipe_set_vertex_element;
- softpipe->pipe.set_feedback_buffer = softpipe_set_feedback_buffer;
softpipe->pipe.draw_arrays = softpipe_draw_arrays;
softpipe->pipe.draw_elements = softpipe_draw_elements;
@@ -324,9 +290,7 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys,
softpipe->pipe.clear = softpipe_clear;
softpipe->pipe.flush = softpipe_flush;
- softpipe->pipe.begin_query = softpipe_begin_query;
- softpipe->pipe.end_query = softpipe_end_query;
- softpipe->pipe.wait_query = softpipe_wait_query;
+ softpipe_init_query_funcs( softpipe );
/* textures */
softpipe->pipe.texture_create = softpipe_texture_create;
diff --git a/src/mesa/pipe/softpipe/sp_context.h b/src/mesa/pipe/softpipe/sp_context.h
index b97cdc52c6..2c038de5f7 100644
--- a/src/mesa/pipe/softpipe/sp_context.h
+++ b/src/mesa/pipe/softpipe/sp_context.h
@@ -82,10 +82,8 @@ struct softpipe_context {
const struct sp_vertex_shader_state *vs;
struct pipe_blend_color blend_color;
- struct pipe_clear_color_state clear_color;
struct pipe_clip_state clip;
struct pipe_constant_buffer constants[2];
- struct pipe_feedback_state feedback;
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
@@ -93,13 +91,12 @@ struct softpipe_context {
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX];
- uint sampler_units[PIPE_MAX_SAMPLERS];
unsigned dirty;
- /*
- * Active queries
+ /* Counter for occlusion queries. Note this supports overlapping
+ * queries.
*/
- struct pipe_query_object *queries[PIPE_QUERY_TYPES];
+ uint64_t occlusion_count;
/*
* Mapped vertex buffers
@@ -117,9 +114,6 @@ struct softpipe_context {
boolean need_w; /**< produce quad/fragment W values? */
int psize_slot;
- /** Feedback buffers */
- struct pipe_feedback_buffer feedback_buffer[PIPE_MAX_FEEDBACK_ATTRIBS];
-
#if 0
/* Stipple derived state:
*/
diff --git a/src/mesa/pipe/softpipe/sp_draw_arrays.c b/src/mesa/pipe/softpipe/sp_draw_arrays.c
index 93eb68405d..b7626f8a5f 100644
--- a/src/mesa/pipe/softpipe/sp_draw_arrays.c
+++ b/src/mesa/pipe/softpipe/sp_draw_arrays.c
@@ -138,18 +138,6 @@ softpipe_draw_elements(struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
- /* Map feedback buffers if enabled */
- if (sp->feedback.enabled) {
- const uint n = sp->feedback.interleaved ? 1 : sp->feedback.num_attribs;
- for (i = 0; i < n; i++) {
- void *ptr = pipe->winsys->buffer_map(pipe->winsys,
- sp->feedback_buffer[i].buffer,
- PIPE_BUFFER_FLAG_WRITE);
- draw_set_mapped_feedback_buffer(draw, i, ptr,
- sp->feedback_buffer[i].size);
- }
- }
-
/* draw! */
draw_arrays(draw, mode, start, count);
@@ -171,16 +159,6 @@ softpipe_draw_elements(struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
- /* Unmap feedback buffers if enabled */
- if (sp->feedback.enabled) {
- const uint n = sp->feedback.interleaved ? 1 : sp->feedback.num_attribs;
- for (i = 0; i < n; i++) {
- pipe->winsys->buffer_unmap(pipe->winsys,
- sp->feedback_buffer[i].buffer);
- draw_set_mapped_feedback_buffer(draw, i, NULL, 0);
- }
- }
-
/* Note: leave drawing surfaces mapped */
softpipe_unmap_constant_buffers(sp);
diff --git a/src/mesa/pipe/softpipe/sp_prim_setup.c b/src/mesa/pipe/softpipe/sp_prim_setup.c
index 6980564bdc..fc96f92af1 100644
--- a/src/mesa/pipe/softpipe/sp_prim_setup.c
+++ b/src/mesa/pipe/softpipe/sp_prim_setup.c
@@ -47,11 +47,11 @@
* Triangle edge info
*/
struct edge {
- float dx; /**< X(v1) - X(v0), used only during setup */
- float dy; /**< Y(v1) - Y(v0), used only during setup */
+ float dx; /**< X(v1) - X(v0), used only during setup */
+ float dy; /**< Y(v1) - Y(v0), used only during setup */
float dxdy; /**< dx/dy */
- float sx, sy; /**< first sample point coord */
- int lines; /**< number of lines on this edge */
+ float sx, sy; /**< first sample point coord */
+ int lines; /**< number of lines on this edge */
};
@@ -178,10 +178,9 @@ static INLINE int block( int x )
* this is pretty nasty... may need to rework flush_spans again to
* fix it, if possible.
*/
-static unsigned calculate_mask( struct setup_stage *setup,
- int x )
+static unsigned calculate_mask( struct setup_stage *setup, int x )
{
- unsigned mask = 0;
+ unsigned mask = 0x0;
if (x >= setup->span.left[0] && x < setup->span.right[0])
mask |= MASK_TOP_LEFT;
@@ -207,18 +206,21 @@ static void flush_spans( struct setup_stage *setup )
int minleft, maxright;
int x;
- switch (setup->span.y_flags) {
- case 3:
+ switch (setup->span.y_flags) {
+ case 0x3:
+ /* both odd and even lines written (both quad rows) */
minleft = MIN2(setup->span.left[0], setup->span.left[1]);
maxright = MAX2(setup->span.right[0], setup->span.right[1]);
break;
- case 1:
+ case 0x1:
+ /* only even line written (quad top row) */
minleft = setup->span.left[0];
maxright = setup->span.right[0];
break;
- case 2:
+ case 0x2:
+ /* only odd line written (quad bottom row) */
minleft = setup->span.left[1];
maxright = setup->span.right[1];
break;
@@ -227,12 +229,12 @@ static void flush_spans( struct setup_stage *setup )
return;
}
-
- for (x = block(minleft); x <= block(maxright); )
- {
+ /* XXX this loop could be moved into the above switch cases and
+ * calculate_mask() could be simplified a bit...
+ */
+ for (x = block(minleft); x <= block(maxright); x += 2) {
emit_quad( setup, x, setup->span.y,
calculate_mask( setup, x ) );
- x += 2;
}
setup->span.y = 0;
@@ -593,7 +595,8 @@ static void subtriangle( struct setup_stage *setup,
setup->span.y = block(_y);
}
- setup->span.left[_y&1] = left;setup->span.right[_y&1] = right;
+ setup->span.left[_y&1] = left;
+ setup->span.right[_y&1] = right;
setup->span.y_flags |= 1<<(_y&1);
}
}
diff --git a/src/mesa/pipe/softpipe/sp_quad_fs.c b/src/mesa/pipe/softpipe/sp_quad_fs.c
index 75576a9bde..251b47341a 100644
--- a/src/mesa/pipe/softpipe/sp_quad_fs.c
+++ b/src/mesa/pipe/softpipe/sp_quad_fs.c
@@ -98,7 +98,6 @@ shade_quad(
/* Consts does not require 16 byte alignment. */
machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
- machine->SamplerUnits = softpipe->sampler_units;
machine->InterpCoefs = quad->coef;
machine->Inputs[0].xyzw[0].f[0] = fx;
@@ -174,12 +173,11 @@ shade_quad_llvm(struct quad_stage *qs,
{
struct quad_shade_stage *qss = quad_shade_stage(qs);
struct softpipe_context *softpipe = qs->softpipe;
- float dests[4][16][4];
+ float dests[4][16][4] ALIGN16_ATTRIB;
+ float inputs[4][16][4] ALIGN16_ATTRIB;
const float fx = (float) quad->x0;
const float fy = (float) quad->y0;
struct gallivm_prog *llvm = qss->llvm_prog;
- float inputs[4][16][4];
- memset(inputs, 0, sizeof(inputs));
inputs[0][0][0] = fx;
inputs[1][0][0] = fx + 1.0f;
@@ -203,10 +201,10 @@ shade_quad_llvm(struct quad_stage *qs,
}
#endif
- /*quad->mask &=*/
+ quad->mask &=
gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs,
softpipe->mapped_constants[PIPE_SHADER_FRAGMENT],
- qss->samplers, softpipe->sampler_units);
+ qss->samplers);
#if DLLVM
printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n",
dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3],
diff --git a/src/mesa/pipe/softpipe/sp_quad_occlusion.c b/src/mesa/pipe/softpipe/sp_quad_occlusion.c
index d65fdbdab7..54254df1f1 100644
--- a/src/mesa/pipe/softpipe/sp_quad_occlusion.c
+++ b/src/mesa/pipe/softpipe/sp_quad_occlusion.c
@@ -39,18 +39,22 @@
#include "sp_surface.h"
#include "sp_quad.h"
+static unsigned count_bits( unsigned val )
+{
+ unsigned i;
+
+ for (i = 0; val ; val >>= 1)
+ i += (val & 1);
+
+ return i;
+}
static void
occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct softpipe_context *softpipe = qs->softpipe;
- struct pipe_query_object *occ
- = softpipe->queries[PIPE_QUERY_OCCLUSION_COUNTER];
- occ->count += (quad->mask ) & 1;
- occ->count += (quad->mask >> 1) & 1;
- occ->count += (quad->mask >> 2) & 1;
- occ->count += (quad->mask >> 3) & 1;
+ softpipe->occlusion_count += count_bits(quad->mask);
qs->next->run(qs->next, quad);
}
diff --git a/src/mesa/pipe/softpipe/sp_query.c b/src/mesa/pipe/softpipe/sp_query.c
new file mode 100644
index 0000000000..bf753dad98
--- /dev/null
+++ b/src/mesa/pipe/softpipe/sp_query.c
@@ -0,0 +1,107 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_util.h"
+#include "sp_context.h"
+#include "sp_query.h"
+
+struct softpipe_query {
+ uint64_t start;
+ uint64_t end;
+};
+
+
+static struct softpipe_query *softpipe_query( struct pipe_query *p )
+{
+ return (struct softpipe_query *)p;
+}
+
+static struct pipe_query *
+softpipe_create_query(struct pipe_context *pipe,
+ unsigned type)
+{
+ assert(type == PIPE_QUERY_OCCLUSION_COUNTER);
+ return (struct pipe_query *)CALLOC_STRUCT( softpipe_query );
+}
+
+
+static void
+softpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ FREE(q);
+}
+
+
+static void
+softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct softpipe_context *softpipe = softpipe_context( pipe );
+ struct softpipe_query *sq = softpipe_query(q);
+
+ sq->start = softpipe->occlusion_count;
+}
+
+
+static void
+softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct softpipe_context *softpipe = softpipe_context( pipe );
+ struct softpipe_query *sq = softpipe_query(q);
+
+ sq->end = softpipe->occlusion_count;
+}
+
+
+static boolean
+softpipe_get_query_result(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ uint64_t *result )
+{
+ struct softpipe_query *sq = softpipe_query(q);
+ *result = sq->end - sq->start;
+ return TRUE;
+}
+
+
+void softpipe_init_query_funcs(struct softpipe_context *softpipe )
+{
+ softpipe->pipe.create_query = softpipe_create_query;
+ softpipe->pipe.destroy_query = softpipe_destroy_query;
+ softpipe->pipe.begin_query = softpipe_begin_query;
+ softpipe->pipe.end_query = softpipe_end_query;
+ softpipe->pipe.get_query_result = softpipe_get_query_result;
+}
+
+
diff --git a/src/mesa/pipe/softpipe/sp_query.h b/src/mesa/pipe/softpipe/sp_query.h
new file mode 100644
index 0000000000..05060a4575
--- /dev/null
+++ b/src/mesa/pipe/softpipe/sp_query.h
@@ -0,0 +1,39 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell
+ */
+
+#ifndef SP_QUERY_H
+#define SP_QUERY_H
+
+struct softpipe_context;
+extern void softpipe_init_query_funcs(struct softpipe_context * );
+
+
+#endif /* SP_QUERY_H */
diff --git a/src/mesa/pipe/softpipe/sp_state.h b/src/mesa/pipe/softpipe/sp_state.h
index ea9d2e62be..a3bd078a71 100644
--- a/src/mesa/pipe/softpipe/sp_state.h
+++ b/src/mesa/pipe/softpipe/sp_state.h
@@ -92,9 +92,6 @@ void softpipe_set_framebuffer_state( struct pipe_context *,
void softpipe_set_blend_color( struct pipe_context *pipe,
const struct pipe_blend_color *blend_color );
-void softpipe_set_clear_color_state( struct pipe_context *,
- const struct pipe_clear_color_state * );
-
void softpipe_set_clip_state( struct pipe_context *,
const struct pipe_clip_state * );
@@ -102,9 +99,6 @@ void softpipe_set_constant_buffer(struct pipe_context *,
uint shader, uint index,
const struct pipe_constant_buffer *buf);
-void softpipe_set_feedback_state( struct pipe_context *,
- const struct pipe_feedback_state * );
-
void *softpipe_create_fs_state(struct pipe_context *,
const struct pipe_shader_state *);
void softpipe_bind_fs_state(struct pipe_context *, void *);
@@ -117,13 +111,10 @@ void softpipe_delete_vs_state(struct pipe_context *, void *);
void softpipe_set_polygon_stipple( struct pipe_context *,
const struct pipe_poly_stipple * );
-void softpipe_set_sampler_units( struct pipe_context *,
- uint numSamplers, const uint *units );
-
void softpipe_set_scissor_state( struct pipe_context *,
const struct pipe_scissor_state * );
-void softpipe_set_texture_state( struct pipe_context *,
+void softpipe_set_sampler_texture( struct pipe_context *,
unsigned unit,
struct pipe_texture * );
@@ -138,11 +129,6 @@ void softpipe_set_vertex_buffer(struct pipe_context *,
unsigned index,
const struct pipe_vertex_buffer *);
-void softpipe_set_feedback_buffer(struct pipe_context *,
- uint index,
- const struct pipe_feedback_buffer *);
-
-
void softpipe_update_derived( struct softpipe_context *softpipe );
diff --git a/src/mesa/pipe/softpipe/sp_state_sampler.c b/src/mesa/pipe/softpipe/sp_state_sampler.c
index 173901f04e..3842e71503 100644
--- a/src/mesa/pipe/softpipe/sp_state_sampler.c
+++ b/src/mesa/pipe/softpipe/sp_state_sampler.c
@@ -67,9 +67,9 @@ softpipe_delete_sampler_state(struct pipe_context *pipe,
void
-softpipe_set_texture_state(struct pipe_context *pipe,
- unsigned unit,
- struct pipe_texture *texture)
+softpipe_set_sampler_texture(struct pipe_context *pipe,
+ unsigned unit,
+ struct pipe_texture *texture)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
@@ -82,15 +82,4 @@ softpipe_set_texture_state(struct pipe_context *pipe,
}
-void
-softpipe_set_sampler_units(struct pipe_context *pipe,
- uint num_samplers, const uint *units )
-{
- struct softpipe_context *softpipe = softpipe_context(pipe);
- uint i;
- for (i = 0; i < num_samplers; i++)
- softpipe->sampler_units[i] = units[i];
- softpipe->dirty |= SP_NEW_SAMPLER;
-}
-
diff --git a/src/mesa/pipe/softpipe/sp_state_surface.c b/src/mesa/pipe/softpipe/sp_state_surface.c
index 30bedc74bc..ee72aaf4c5 100644
--- a/src/mesa/pipe/softpipe/sp_state_surface.c
+++ b/src/mesa/pipe/softpipe/sp_state_surface.c
@@ -131,11 +131,3 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe,
-void
-softpipe_set_clear_color_state(struct pipe_context *pipe,
- const struct pipe_clear_color_state *clear)
-{
- struct softpipe_context *softpipe = softpipe_context(pipe);
-
- softpipe->clear_color = *clear; /* struct copy */
-}
diff --git a/src/mesa/pipe/softpipe/sp_surface.c b/src/mesa/pipe/softpipe/sp_surface.c
index 55b8b85ef2..1dc494d6ff 100644
--- a/src/mesa/pipe/softpipe/sp_surface.c
+++ b/src/mesa/pipe/softpipe/sp_surface.c
@@ -215,6 +215,59 @@ a1r5g5b5_get_tile(struct pipe_surface *ps,
}
+/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/
+
+static void
+a4r4g4b4_get_tile(struct pipe_surface *ps,
+ unsigned x, unsigned y, unsigned w, unsigned h, float *p)
+{
+ const ushort *src
+ = ((const ushort *) (ps->map))
+ + y * ps->pitch + x;
+ unsigned i, j;
+
+ assert(ps->format == PIPE_FORMAT_A4R4G4B4_UNORM);
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const ushort pixel = src[j];
+ p[0] = ((pixel >> 8) & 0xf) * (1.0f / 15.0f);
+ p[1] = ((pixel >> 4) & 0xf) * (1.0f / 15.0f);
+ p[2] = ((pixel ) & 0xf) * (1.0f / 15.0f);
+ p[3] = ((pixel >> 12) ) * (1.0f / 15.0f);
+ p += 4;
+ }
+ src += ps->pitch;
+ }
+}
+
+
+/*** PIPE_FORMAT_R5G6B5_UNORM ***/
+
+static void
+r5g6b5_get_tile(struct pipe_surface *ps,
+ unsigned x, unsigned y, unsigned w, unsigned h, float *p)
+{
+ const ushort *src
+ = ((const ushort *) (ps->map))
+ + y * ps->pitch + x;
+ unsigned i, j;
+
+ assert(ps->format == PIPE_FORMAT_R5G6B5_UNORM);
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const ushort pixel = src[j];
+ p[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f);
+ p[1] = ((pixel >> 5) & 0x3f) * (1.0f / 63.0f);
+ p[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f);
+ p[3] = 1.0f;
+ p += 4;
+ }
+ src += ps->pitch;
+ }
+}
+
/*** PIPE_FORMAT_Z16_UNORM ***/
@@ -576,11 +629,11 @@ softpipe_get_tex_surface(struct pipe_context *pipe,
assert(zslice == 0);
}
- ps = pipe->winsys->surface_alloc(pipe->winsys, pt->format);
+ ps = pipe->winsys->surface_alloc(pipe->winsys);
if (ps) {
- assert(ps->format);
assert(ps->refcount);
pipe->winsys->buffer_reference(pipe->winsys, &ps->buffer, spt->buffer);
+ ps->format = pt->format;
ps->cpp = pt->cpp;
ps->width = pt->width[level];
ps->height = pt->height[level];
@@ -674,6 +727,12 @@ softpipe_get_tile_rgba(struct pipe_context *pipe,
case PIPE_FORMAT_A1R5G5B5_UNORM:
a1r5g5b5_get_tile(ps, x, y, w, h, p);
break;
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ a4r4g4b4_get_tile(ps, x, y, w, h, p);
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ r5g6b5_get_tile(ps, x, y, w, h, p);
+ break;
case PIPE_FORMAT_U_L8:
l8_get_tile(ps, x, y, w, h, p);
break;
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.c b/src/mesa/pipe/tgsi/exec/tgsi_exec.c
index ab83f27c1b..8636271a34 100644
--- a/src/mesa/pipe/tgsi/exec/tgsi_exec.c
+++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.c
@@ -1217,8 +1217,7 @@ exec_tex(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
boolean biasLod)
{
- const uint sampler = inst->FullSrcRegisters[1].SrcRegister.Index;
- const uint unit = mach->SamplerUnits[sampler];
+ const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
union tgsi_exec_channel r[8];
uint chan_index;
float lodBias;
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.h b/src/mesa/pipe/tgsi/exec/tgsi_exec.h
index 2c62b30f15..e7952a08e3 100644
--- a/src/mesa/pipe/tgsi/exec/tgsi_exec.h
+++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.h
@@ -162,7 +162,6 @@ struct tgsi_exec_machine
struct tgsi_exec_vector *Temps;
struct tgsi_exec_vector *Addrs;
- uint *SamplerUnits;
struct tgsi_sampler *Samplers;
float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
index e403cfaaf3..8ee43f59b5 100755
--- a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
+++ b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
@@ -2245,6 +2245,11 @@ tgsi_emit_sse2(
&parse.FullToken.FullInstruction );
break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* XXX implement this */
+ assert(0);
+ break;
+
default:
assert( 0 );
}
@@ -2321,6 +2326,11 @@ tgsi_emit_sse2_fs(
&parse.FullToken.FullInstruction );
break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* XXX implement this */
+ assert(0);
+ break;
+
default:
assert( 0 );
}
diff --git a/src/mesa/pipe/xlib/brw_aub.c b/src/mesa/pipe/xlib/brw_aub.c
new file mode 100644
index 0000000000..1b7fc3c20e
--- /dev/null
+++ b/src/mesa/pipe/xlib/brw_aub.c
@@ -0,0 +1,392 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_aub.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "imports.h"
+//#include "intel_winsys.h"
+
+
+struct brw_aubfile {
+ FILE *file;
+ unsigned next_free_page;
+};
+
+
+extern char *__progname;
+
+
+struct aub_file_header {
+ unsigned int instruction_type;
+ unsigned int pad0:16;
+ unsigned int minor:8;
+ unsigned int major:8;
+ unsigned char application[8*4];
+ unsigned int day:8;
+ unsigned int month:8;
+ unsigned int year:16;
+ unsigned int timezone:8;
+ unsigned int second:8;
+ unsigned int minute:8;
+ unsigned int hour:8;
+ unsigned int comment_length:16;
+ unsigned int pad1:16;
+};
+
+struct aub_block_header {
+ unsigned int instruction_type;
+ unsigned int operation:8;
+ unsigned int type:8;
+ unsigned int address_space:8;
+ unsigned int pad0:8;
+ unsigned int general_state_type:8;
+ unsigned int surface_state_type:8;
+ unsigned int pad1:16;
+ unsigned int address;
+ unsigned int length;
+};
+
+struct aub_dump_bmp {
+ unsigned int instruction_type;
+ unsigned int xmin:16;
+ unsigned int ymin:16;
+ unsigned int pitch:16;
+ unsigned int bpp:8;
+ unsigned int format:8;
+ unsigned int xsize:16;
+ unsigned int ysize:16;
+ unsigned int addr;
+ unsigned int unknown;
+};
+
+enum bh_operation {
+ BH_COMMENT,
+ BH_DATA_WRITE,
+ BH_COMMAND_WRITE,
+ BH_MMI0_WRITE32,
+ BH_END_SCENE,
+ BH_CONFIG_MEMORY_MAP,
+ BH_MAX_OPERATION
+};
+
+enum command_write_type {
+ CW_HWB_RING = 1,
+ CW_PRIMARY_RING_A,
+ CW_PRIMARY_RING_B, /* XXX - disagreement with listaub! */
+ CW_PRIMARY_RING_C,
+ CW_MAX_TYPE
+};
+
+enum memory_map_type {
+ MM_DEFAULT,
+ MM_DYNAMIC,
+ MM_MAX_TYPE
+};
+
+enum address_space {
+ ADDR_GTT,
+ ADDR_LOCAL,
+ ADDR_MAIN,
+ ADDR_MAX
+};
+
+
+#define AUB_FILE_HEADER 0xe085000b
+#define AUB_BLOCK_HEADER 0xe0c10003
+#define AUB_DUMP_BMP 0xe09e0004
+
+/* Registers to control page table
+ */
+#define PGETBL_CTL 0x2020
+#define PGETBL_ENABLED 0x1
+
+#define NR_GTT_ENTRIES 65536 /* 256 mb */
+
+#define FAIL \
+do { \
+ fprintf(stderr, "failed to write aub data at %s/%d\n", __FUNCTION__, __LINE__); \
+ exit(1); \
+} while (0)
+
+
+/* Emit the headers at the top of each aubfile. Initialize the GTT.
+ */
+static void init_aubfile( FILE *aub_file )
+{
+ struct aub_file_header fh;
+ struct aub_block_header bh;
+ unsigned int data;
+
+ static int nr;
+
+ nr++;
+
+ /* Emit the aub header:
+ */
+ memset(&fh, 0, sizeof(fh));
+
+ fh.instruction_type = AUB_FILE_HEADER;
+ fh.minor = 0x0;
+ fh.major = 0x7;
+ memcpy(fh.application, __progname, sizeof(fh.application));
+ fh.day = (nr>>24) & 0xff;
+ fh.month = 0x0;
+ fh.year = 0x0;
+ fh.timezone = 0x0;
+ fh.second = nr & 0xff;
+ fh.minute = (nr>>8) & 0xff;
+ fh.hour = (nr>>16) & 0xff;
+ fh.comment_length = 0x0;
+
+ if (fwrite(&fh, sizeof(fh), 1, aub_file) < 0)
+ FAIL;
+
+ /* Setup the GTT starting at main memory address zero (!):
+ */
+ memset(&bh, 0, sizeof(bh));
+
+ bh.instruction_type = AUB_BLOCK_HEADER;
+ bh.operation = BH_MMI0_WRITE32;
+ bh.type = 0x0;
+ bh.address_space = ADDR_GTT; /* ??? */
+ bh.general_state_type = 0x0;
+ bh.surface_state_type = 0x0;
+ bh.address = PGETBL_CTL;
+ bh.length = 0x4;
+
+ if (fwrite(&bh, sizeof(bh), 1, aub_file) < 0)
+ FAIL;
+
+ data = 0x0 | PGETBL_ENABLED;
+
+ if (fwrite(&data, sizeof(data), 1, aub_file) < 0)
+ FAIL;
+}
+
+
+static void init_aub_gtt( struct brw_aubfile *aubfile,
+ unsigned start_offset,
+ unsigned size )
+{
+ FILE *aub_file = aubfile->file;
+ struct aub_block_header bh;
+ unsigned int i;
+
+ assert(start_offset + size < NR_GTT_ENTRIES * 4096);
+
+
+ memset(&bh, 0, sizeof(bh));
+
+ bh.instruction_type = AUB_BLOCK_HEADER;
+ bh.operation = BH_DATA_WRITE;
+ bh.type = 0x0;
+ bh.address_space = ADDR_MAIN;
+ bh.general_state_type = 0x0;
+ bh.surface_state_type = 0x0;
+ bh.address = start_offset / 4096 * 4;
+ bh.length = size / 4096 * 4;
+
+ if (fwrite(&bh, sizeof(bh), 1, aub_file) < 0)
+ FAIL;
+
+ for (i = 0; i < size / 4096; i++) {
+ unsigned data = aubfile->next_free_page | 1;
+
+ aubfile->next_free_page += 4096;
+
+ if (fwrite(&data, sizeof(data), 1, aub_file) < 0)
+ FAIL;
+ }
+
+}
+
+static void write_block_header( FILE *aub_file,
+ struct aub_block_header *bh,
+ const unsigned *data,
+ unsigned sz )
+{
+ sz = (sz + 3) & ~3;
+
+ if (fwrite(bh, sizeof(*bh), 1, aub_file) < 0)
+ FAIL;
+
+ if (fwrite(data, sz, 1, aub_file) < 0)
+ FAIL;
+
+ fflush(aub_file);
+}
+
+
+static void write_dump_bmp( FILE *aub_file,
+ struct aub_dump_bmp *db )
+{
+ if (fwrite(db, sizeof(*db), 1, aub_file) < 0)
+ FAIL;
+
+ fflush(aub_file);
+}
+
+
+
+void brw_aub_gtt_data( struct brw_aubfile *aubfile,
+ unsigned offset,
+ const void *data,
+ unsigned sz,
+ unsigned type,
+ unsigned state_type )
+{
+ struct aub_block_header bh;
+
+ bh.instruction_type = AUB_BLOCK_HEADER;
+ bh.operation = BH_DATA_WRITE;
+ bh.type = type;
+ bh.address_space = ADDR_GTT;
+ bh.pad0 = 0;
+
+ if (type == DW_GENERAL_STATE) {
+ bh.general_state_type = state_type;
+ bh.surface_state_type = 0;
+ }
+ else {
+ bh.general_state_type = 0;
+ bh.surface_state_type = state_type;
+ }
+
+ bh.pad1 = 0;
+ bh.address = offset;
+ bh.length = sz;
+
+ write_block_header(aubfile->file, &bh, data, sz);
+}
+
+
+
+void brw_aub_gtt_cmds( struct brw_aubfile *aubfile,
+ unsigned offset,
+ const void *data,
+ unsigned sz )
+{
+ struct aub_block_header bh;
+ unsigned type = CW_PRIMARY_RING_A;
+
+
+ bh.instruction_type = AUB_BLOCK_HEADER;
+ bh.operation = BH_COMMAND_WRITE;
+ bh.type = type;
+ bh.address_space = ADDR_GTT;
+ bh.pad0 = 0;
+ bh.general_state_type = 0;
+ bh.surface_state_type = 0;
+ bh.pad1 = 0;
+ bh.address = offset;
+ bh.length = sz;
+
+ write_block_header(aubfile->file, &bh, data, sz);
+}
+
+void brw_aub_dump_bmp( struct brw_aubfile *aubfile,
+ struct pipe_surface *surface,
+ unsigned gtt_offset )
+{
+ struct aub_dump_bmp db;
+ unsigned format;
+
+ if (surface->cpp == 4)
+ format = 0x7;
+ else
+ format = 0x3;
+
+ db.instruction_type = AUB_DUMP_BMP;
+ db.xmin = 0;
+ db.ymin = 0;
+ db.format = format;
+ db.bpp = surface->cpp * 8;
+ db.pitch = surface->pitch;
+ db.xsize = surface->pitch;
+ db.ysize = surface->height;
+ db.addr = gtt_offset;
+ db.unknown = /* surface->tiled ? 0x4 : */ 0x0;
+
+ write_dump_bmp(aubfile->file, &db);
+}
+
+
+
+struct brw_aubfile *brw_aubfile_create( void )
+{
+ struct brw_aubfile *aubfile = CALLOC_STRUCT(brw_aubfile);
+ char filename[80];
+ int val;
+ static int i = 0;
+
+ i++;
+
+ if (_mesa_getenv("INTEL_AUBFILE")) {
+ val = snprintf(filename, sizeof(filename), "%s%d.aub", _mesa_getenv("INTEL_AUBFILE"), i%4);
+ _mesa_printf("--> Aub file: %s\n", filename);
+ aubfile->file = fopen(filename, "w");
+ }
+ else {
+ val = snprintf(filename, sizeof(filename), "%s.aub", __progname);
+ if (val < 0 || val > sizeof(filename))
+ strcpy(filename, "default.aub");
+
+ _mesa_printf("--> Aub file: %s\n", filename);
+ aubfile->file = fopen(filename, "w");
+ }
+
+ if (!aubfile->file) {
+ _mesa_printf("couldn't open aubfile\n");
+ exit(1);
+ }
+
+ init_aubfile(aubfile->file);
+
+ /* The GTT is located starting address zero in main memory. Pages
+ * to populate the gtt start after this point.
+ */
+ aubfile->next_free_page = (NR_GTT_ENTRIES * 4 + 4095) & ~4095;
+
+ /* More or less correspond with all the agp regions mapped by the
+ * driver:
+ */
+ init_aub_gtt(aubfile, 0, 4096*4);
+ init_aub_gtt(aubfile, AUB_BUF_START, AUB_BUF_SIZE);
+
+ return aubfile;
+}
+
+void brw_aub_destroy( struct brw_aubfile *aubfile )
+{
+ fclose(aubfile->file);
+ FREE(aubfile);
+}
diff --git a/src/mesa/pipe/xlib/brw_aub.h b/src/mesa/pipe/xlib/brw_aub.h
new file mode 100644
index 0000000000..f5c60c7be2
--- /dev/null
+++ b/src/mesa/pipe/xlib/brw_aub.h
@@ -0,0 +1,114 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef BRW_AUB_H
+#define BRW_AUB_H
+
+/* We set up this region, buffers may be allocated here:
+ */
+#define AUB_BUF_START (4096*4)
+#define AUB_BUF_SIZE (8*1024*1024)
+
+struct intel_context;
+struct pipe_surface;
+
+struct brw_aubfile *brw_aubfile_create( void );
+
+void brw_aub_destroy( struct brw_aubfile *aubfile );
+
+void brw_aub_gtt_data( struct brw_aubfile *aubfile,
+ unsigned offset,
+ const void *data,
+ unsigned sz,
+ unsigned type,
+ unsigned state_type );
+
+void brw_aub_gtt_cmds( struct brw_aubfile *aubfile,
+ unsigned offset,
+ const void *data,
+ unsigned sz );
+
+void brw_aub_dump_bmp( struct brw_aubfile *aubfile,
+ struct pipe_surface *surface,
+ unsigned gtt_offset );
+
+
+enum data_write_type {
+ DW_NOTYPE,
+ DW_BATCH_BUFFER,
+ DW_BIN_BUFFER,
+ DW_BIN_POINTER_LIST,
+ DW_SLOW_STATE_BUFFER,
+ DW_VERTEX_BUFFER,
+ DW_2D_MAP,
+ DW_CUBE_MAP,
+ DW_INDIRECT_STATE_BUFFER,
+ DW_VOLUME_MAP,
+ DW_1D_MAP,
+ DW_CONSTANT_BUFFER,
+ DW_CONSTANT_URB_ENTRY,
+ DW_INDEX_BUFFER,
+ DW_GENERAL_STATE,
+ DW_SURFACE_STATE,
+ DW_MEDIA_OBJECT_INDIRECT_DATA,
+ DW_MAX_TYPE
+};
+
+enum data_write_general_state_type {
+ DWGS_NOTYPE,
+ DWGS_VERTEX_SHADER_STATE,
+ DWGS_GEOMETRY_SHADER_STATE ,
+ DWGS_CLIPPER_STATE,
+ DWGS_STRIPS_FANS_STATE,
+ DWGS_WINDOWER_IZ_STATE,
+ DWGS_COLOR_CALC_STATE,
+ DWGS_CLIPPER_VIEWPORT_STATE, /* was 0x7 */
+ DWGS_STRIPS_FANS_VIEWPORT_STATE,
+ DWGS_COLOR_CALC_VIEWPORT_STATE, /* was 0x9 */
+ DWGS_SAMPLER_STATE,
+ DWGS_KERNEL_INSTRUCTIONS,
+ DWGS_SCRATCH_SPACE,
+ DWGS_SAMPLER_DEFAULT_COLOR,
+ DWGS_INTERFACE_DESCRIPTOR,
+ DWGS_VLD_STATE,
+ DWGS_VFE_STATE,
+ DWGS_MAX_TYPE
+};
+
+enum data_write_surface_state_type {
+ DWSS_NOTYPE,
+ DWSS_BINDING_TABLE_STATE,
+ DWSS_SURFACE_STATE,
+ DWSS_MAX_TYPE
+};
+
+
+#endif
diff --git a/src/mesa/pipe/xlib/xm_api.c b/src/mesa/pipe/xlib/xm_api.c
index 9c7713790b..83e2b529a4 100644
--- a/src/mesa/pipe/xlib/xm_api.c
+++ b/src/mesa/pipe/xlib/xm_api.c
@@ -75,12 +75,16 @@
#include "pipe/p_defines.h"
#include "pipe/p_context.h"
+#include "xm_winsys_aub.h"
+
/**
* Global X driver lock
*/
_glthread_Mutex _xmesa_lock;
+int xmesa_mode;
+
/**********************************************************************/
/***** X Utility Functions *****/
@@ -738,7 +742,14 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
pf = choose_pixel_format(v);
assert(pf);
- pipe = xmesa_create_pipe_context( c, pf );
+ if (!getenv("XM_AUB")) {
+ xmesa_mode = XMESA_SOFTPIPE;
+ pipe = xmesa_create_pipe_context( c, pf );
+ }
+ else {
+ xmesa_mode = XMESA_AUB;
+ pipe = xmesa_create_i965simple( xmesa_get_pipe_winsys_aub() );
+ }
c->st = st_create_context(pipe, &v->mesa_visual,
share_list ? share_list->st : NULL);
@@ -1164,7 +1175,10 @@ void XMesaSwapBuffers( XMesaBuffer b )
surf = st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT);
if (surf) {
- xmesa_display_surface(b, surf);
+ if (xmesa_mode == XMESA_AUB)
+ xmesa_display_aub( surf );
+ else
+ xmesa_display_surface(b, surf);
}
xmesa_check_and_update_buffer_size(NULL, b);
diff --git a/src/mesa/pipe/xlib/xm_winsys.c b/src/mesa/pipe/xlib/xm_winsys.c
index c372545cf8..42c43387af 100644
--- a/src/mesa/pipe/xlib/xm_winsys.c
+++ b/src/mesa/pipe/xlib/xm_winsys.c
@@ -35,58 +35,18 @@
#include "glxheader.h"
#include "xmesaP.h"
-#include "main/macros.h"
#include "pipe/p_winsys.h"
#include "pipe/p_format.h"
#include "pipe/p_context.h"
+#include "pipe/p_util.h"
#include "pipe/softpipe/sp_winsys.h"
#ifdef GALLIUM_CELL
#include "pipe/cell/ppu/cell_context.h"
#include "pipe/cell/ppu/cell_winsys.h"
#endif
-
-
-/** XXX from Mesa core */
-static void *
-align_malloc(size_t bytes, unsigned long alignment)
-{
-#if defined(HAVE_POSIX_MEMALIGN)
- void *mem;
-
- (void) posix_memalign(& mem, alignment, bytes);
- return mem;
-#else
- uintptr_t ptr, buf;
-
- assert( alignment > 0 );
-
- ptr = (uintptr_t) malloc(bytes + alignment + sizeof(void *));
- if (!ptr)
- return NULL;
-
- buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
- *(uintptr_t *)(buf - sizeof(void *)) = ptr;
-
- return (void *) buf;
-#endif /* defined(HAVE_POSIX_MEMALIGN) */
-}
-
-
-/** XXX from Mesa core */
-static void
-align_free(void *ptr)
-{
-#if defined(HAVE_POSIX_MEMALIGN)
- free(ptr);
-#else
- void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
- void *realAddr = *cubbyHole;
- free(realAddr);
-#endif /* defined(HAVE_POSIX_MEMALIGN) */
-}
-
+#include "xm_winsys_aub.h"
/**
@@ -211,6 +171,7 @@ xm_buffer_data(struct pipe_winsys *pws, struct pipe_buffer_handle *buf,
if (xm_buf->size != size) {
if (xm_buf->data)
align_free(xm_buf->data);
+ /* align to 16-byte multiple for Cell */
xm_buf->data = align_malloc(size, 16);
xm_buf->size = size;
}
@@ -254,6 +215,7 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
XImage *ximage = b->tempImage;
struct xm_buffer *xm_buf = xm_bo(surf->buffer);
const int TILE_SIZE = 32;
+ const uint tilesPerRow = (surf->width + TILE_SIZE - 1) / TILE_SIZE;
uint x, y;
/* check that the XImage has been previously initialized */
@@ -271,7 +233,7 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
int dy = y;
int tx = x / TILE_SIZE;
int ty = y / TILE_SIZE;
- int offset = ty * (surf->width / TILE_SIZE) + tx;
+ int offset = ty * tilesPerRow + tx;
offset *= 4 * TILE_SIZE * TILE_SIZE;
ximage->data = (char *) xm_buf->data + offset;
@@ -386,11 +348,38 @@ round_up(unsigned n, unsigned multiple)
return (n + multiple - 1) & ~(multiple - 1);
}
-static unsigned
-xm_surface_pitch(struct pipe_winsys *winsys, unsigned cpp, unsigned width,
- unsigned flags)
+static int
+xm_surface_alloc_storage(struct pipe_winsys *winsys,
+ struct pipe_surface *surf,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned flags)
{
- return round_up(width, 64 / cpp);
+ const unsigned alignment = 64;
+ int ret;
+
+ surf->width = width;
+ surf->height = height;
+ surf->format = format;
+ surf->cpp = pf_get_size(format);
+ surf->pitch = round_up(width, alignment / surf->cpp);
+
+ assert(!surf->buffer);
+ surf->buffer = winsys->buffer_create(winsys, alignment, 0, 0);
+ if(!surf->buffer)
+ return -1;
+
+ ret = winsys->buffer_data(winsys,
+ surf->buffer,
+ surf->pitch * surf->cpp * height,
+ NULL,
+ 0);
+ if(ret) {
+ winsys->buffer_reference(winsys, &surf->buffer, NULL);
+ return ret;
+ }
+
+ return 0;
}
@@ -399,14 +388,12 @@ xm_surface_pitch(struct pipe_winsys *winsys, unsigned cpp, unsigned width,
* renderbuffers, etc.
*/
static struct pipe_surface *
-xm_surface_alloc(struct pipe_winsys *ws, enum pipe_format pipeFormat)
+xm_surface_alloc(struct pipe_winsys *ws)
{
struct xmesa_surface *xms = CALLOC_STRUCT(xmesa_surface);
assert(ws);
- assert(pipeFormat);
- xms->surface.format = pipeFormat;
xms->surface.refcount = 1;
xms->surface.winsys = ws;
@@ -441,12 +428,15 @@ xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s)
* For Xlib, this is a singleton object.
* Nothing special for the Xlib driver so no subclassing or anything.
*/
-static struct pipe_winsys *
-xmesa_get_pipe_winsys(void)
+struct pipe_winsys *
+xmesa_get_pipe_winsys_aub(void)
{
static struct pipe_winsys *ws = NULL;
- if (!ws) {
+ if (!ws && getenv("XM_AUB")) {
+ ws = xmesa_create_pipe_winsys_aub();
+ }
+ else if (!ws) {
ws = CALLOC_STRUCT(pipe_winsys);
/* Fill in this struct with callbacks that pipe will need to
@@ -461,8 +451,8 @@ xmesa_get_pipe_winsys(void)
ws->buffer_subdata = xm_buffer_subdata;
ws->buffer_get_subdata = xm_buffer_get_subdata;
- ws->surface_pitch = xm_surface_pitch;
ws->surface_alloc = xm_surface_alloc;
+ ws->surface_alloc_storage = xm_surface_alloc_storage;
ws->surface_release = xm_surface_release;
ws->flush_frontbuffer = xm_flush_frontbuffer;
@@ -510,7 +500,7 @@ xmesa_get_softpipe_winsys(uint pixelformat)
struct pipe_context *
xmesa_create_pipe_context(XMesaContext xmesa, uint pixelformat)
{
- struct pipe_winsys *pws = xmesa_get_pipe_winsys();
+ struct pipe_winsys *pws = xmesa_get_pipe_winsys_aub();
struct pipe_context *pipe;
#ifdef GALLIUM_CELL
diff --git a/src/mesa/pipe/xlib/xm_winsys_aub.c b/src/mesa/pipe/xlib/xm_winsys_aub.c
new file mode 100644
index 0000000000..ee3c2d6181
--- /dev/null
+++ b/src/mesa/pipe/xlib/xm_winsys_aub.c
@@ -0,0 +1,618 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell
+ * Brian Paul
+ */
+
+
+#include "glxheader.h"
+#include "xmesaP.h"
+#include "main/macros.h"
+
+#include "pipe/p_winsys.h"
+#include "pipe/i965simple/brw_winsys.h"
+#include "brw_aub.h"
+#include "xm_winsys_aub.h"
+
+
+
+struct aub_buffer {
+ char *data;
+ unsigned offset;
+ unsigned size;
+ unsigned refcount;
+ unsigned map_count;
+ boolean dump_on_unmap;
+ boolean userbuffer;
+};
+
+
+
+struct aub_pipe_winsys {
+ struct pipe_winsys winsys;
+
+ struct brw_aubfile *aubfile;
+
+ /* This is simple, isn't it:
+ */
+ char *pool;
+ unsigned size;
+ unsigned used;
+};
+
+
+/* Turn a pipe winsys into an aub/pipe winsys:
+ */
+static inline struct aub_pipe_winsys *
+aub_pipe_winsys( struct pipe_winsys *winsys )
+{
+ return (struct aub_pipe_winsys *)winsys;
+}
+
+
+
+static INLINE struct aub_buffer *
+aub_bo( struct pipe_buffer_handle *bo )
+{
+ return (struct aub_buffer *)bo;
+}
+
+static INLINE struct pipe_buffer_handle *
+pipe_bo( struct aub_buffer *bo )
+{
+ return (struct pipe_buffer_handle *)bo;
+}
+
+
+
+
+static void *aub_buffer_map(struct pipe_winsys *winsys,
+ struct pipe_buffer_handle *buf,
+ unsigned flags )
+{
+ struct aub_buffer *sbo = aub_bo(buf);
+
+ if (flags & PIPE_BUFFER_FLAG_WRITE)
+ sbo->dump_on_unmap = 1;
+
+ sbo->map_count++;
+ return sbo->data;
+}
+
+static void aub_buffer_unmap(struct pipe_winsys *winsys,
+ struct pipe_buffer_handle *buf)
+{
+ struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
+ struct aub_buffer *sbo = aub_bo(buf);
+
+ sbo->map_count--;
+
+ if (sbo->map_count == 0 &&
+ sbo->dump_on_unmap) {
+ brw_aub_gtt_data( iws->aubfile,
+ sbo->offset,
+ sbo->data,
+ sbo->size,
+ 0,
+ 0);
+ }
+}
+
+
+static void
+aub_buffer_reference(struct pipe_winsys *winsys,
+ struct pipe_buffer_handle **ptr,
+ struct pipe_buffer_handle *buf)
+{
+ if (*ptr) {
+ if (--(aub_bo(*ptr)->refcount) == 0)
+ free(*ptr);
+ *ptr = NULL;
+ }
+
+ if (buf) {
+ aub_bo(buf)->refcount++;
+ *ptr = buf;
+ }
+}
+
+
+static int aub_buffer_data(struct pipe_winsys *winsys,
+ struct pipe_buffer_handle *buf,
+ unsigned size, const void *data,
+ unsigned usage )
+{
+ struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
+ struct aub_buffer *sbo = aub_bo(buf);
+
+ /* Could reuse buffers that are not referenced in current
+ * batchbuffer. Can't do that atm, so always reallocate:
+ */
+ if (1 || sbo->size < size) {
+ assert(iws->used + size < iws->size);
+ sbo->data = iws->pool + iws->used;
+ sbo->offset = AUB_BUF_START + iws->used;
+ iws->used += size;
+ }
+
+ sbo->size = size;
+
+ if (data != NULL) {
+ memcpy(iws->pool, data, size);
+
+ brw_aub_gtt_data( iws->aubfile,
+ sbo->offset,
+ sbo->data,
+ sbo->size,
+ 0,
+ 0 );
+ }
+ return 0;
+}
+
+static int aub_buffer_subdata(struct pipe_winsys *winsys,
+ struct pipe_buffer_handle *buf,
+ unsigned long offset,
+ unsigned long size,
+ const void *data)
+{
+ struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
+ struct aub_buffer *sbo = aub_bo(buf);
+
+ assert(sbo->size > offset + size);
+ memcpy(sbo->data + offset, data, size);
+
+ brw_aub_gtt_data( iws->aubfile,
+ sbo->offset + offset,
+ sbo->data + offset,
+ size,
+ 0,
+ 0 );
+ return 0;
+}
+
+
+void xmesa_buffer_subdata_aub(struct pipe_winsys *winsys,
+ struct pipe_buffer_handle *buf,
+ unsigned long offset,
+ unsigned long size,
+ const void *data,
+ unsigned aub_type,
+ unsigned aub_sub_type)
+{
+ struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
+ struct aub_buffer *sbo = aub_bo(buf);
+
+ assert(sbo->size > offset + size);
+ memcpy(sbo->data + offset, data, size);
+
+ brw_aub_gtt_data( iws->aubfile,
+ sbo->offset + offset,
+ sbo->data + offset,
+ size,
+ aub_type,
+ aub_sub_type );
+}
+
+void xmesa_commands_aub(struct pipe_winsys *winsys,
+ unsigned *cmds,
+ unsigned nr_dwords)
+{
+ struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
+ brw_aub_gtt_cmds( iws->aubfile,
+ 0, /* ?? */
+ cmds,
+ nr_dwords * sizeof(int) );
+}
+
+
+static struct aub_pipe_winsys *global_winsys = NULL;
+
+void xmesa_display_aub( /* struct pipe_winsys *winsys, */
+ struct pipe_surface *surface )
+{
+// struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
+ brw_aub_dump_bmp( global_winsys->aubfile,
+ surface,
+ aub_bo(surface->buffer)->offset );
+}
+
+
+
+static int aub_buffer_get_subdata(struct pipe_winsys *winsys,
+ struct pipe_buffer_handle *buf,
+ unsigned long offset,
+ unsigned long size,
+ void *data)
+{
+ struct aub_buffer *sbo = aub_bo(buf);
+ assert(sbo->size > offset + size);
+ memcpy(data, sbo->data + offset, size);
+ return 0;
+}
+
+/* Pipe has no concept of pools. We choose the tex/region pool
+ * for all buffers.
+ */
+static struct pipe_buffer_handle *
+aub_buffer_create(struct pipe_winsys *winsys,
+ unsigned alignment,
+ unsigned flags,
+ unsigned hint)
+{
+ return pipe_bo(CALLOC_STRUCT(aub_buffer));
+}
+
+
+static struct pipe_buffer_handle *
+aub_user_buffer_create(struct pipe_winsys *winsys, void *ptr, unsigned bytes)
+{
+ struct aub_buffer *sbo = CALLOC_STRUCT(aub_buffer);
+ sbo->size = bytes;
+ sbo->userbuffer = 1;
+ sbo->data = ptr;
+ return pipe_bo(sbo);
+}
+
+
+/* The state tracker (should!) keep track of whether the fake
+ * frontbuffer has been touched by any rendering since the last time
+ * we copied its contents to the real frontbuffer. Our task is easy:
+ */
+static void
+aub_flush_frontbuffer( struct pipe_winsys *winsys,
+ struct pipe_surface *surf,
+ void *context_private)
+{
+ xmesa_display_aub( surf );
+}
+
+static struct pipe_surface *
+aub_i915_surface_alloc(struct pipe_winsys *winsys)
+{
+ struct pipe_surface *surf = CALLOC_STRUCT(pipe_surface);
+ if (surf) {
+ surf->refcount = 1;
+ surf->winsys = winsys;
+ }
+ return surf;
+}
+
+
+/**
+ * Round n up to next multiple.
+ */
+static INLINE unsigned
+round_up(unsigned n, unsigned multiple)
+{
+ return (n + multiple - 1) & ~(multiple - 1);
+}
+
+static int
+aub_i915_surface_alloc_storage(struct pipe_winsys *winsys,
+ struct pipe_surface *surf,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned flags)
+{
+ const unsigned alignment = 64;
+ int ret;
+
+ surf->width = width;
+ surf->height = height;
+ surf->format = format;
+ surf->cpp = pf_get_size(format);
+ surf->pitch = round_up(width, alignment / surf->cpp);
+
+ assert(!surf->buffer);
+ surf->buffer = winsys->buffer_create(winsys, alignment, 0, 0);
+ if(!surf->buffer)
+ return -1;
+
+ ret = winsys->buffer_data(winsys,
+ surf->buffer,
+ surf->pitch * surf->cpp * height,
+ NULL,
+ 0);
+ if(ret) {
+ winsys->buffer_reference(winsys, &surf->buffer, NULL);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void
+aub_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s)
+{
+ struct pipe_surface *surf = *s;
+ surf->refcount--;
+ if (surf->refcount == 0) {
+ if (surf->buffer)
+ winsys->buffer_reference(winsys, &surf->buffer, NULL);
+ free(surf);
+ }
+ *s = NULL;
+}
+
+
+
+static void
+aub_printf( struct pipe_winsys *winsys, const char *fmtString, ... )
+{
+ va_list args;
+ va_start( args, fmtString );
+ vfprintf(stderr, fmtString, args);
+ va_end( args );
+}
+
+static const char *
+aub_get_name( struct pipe_winsys *winsys )
+{
+ return "Aub/xlib";
+}
+
+struct pipe_winsys *
+xmesa_create_pipe_winsys_aub( void )
+{
+ struct aub_pipe_winsys *iws = CALLOC_STRUCT( aub_pipe_winsys );
+
+ /* Fill in this struct with callbacks that pipe will need to
+ * communicate with the window system, buffer manager, etc.
+ *
+ * Pipe would be happy with a malloc based memory manager, but
+ * the SwapBuffers implementation in this winsys driver requires
+ * that rendering be done to an appropriate _DriBufferObject.
+ */
+ iws->winsys.buffer_create = aub_buffer_create;
+ iws->winsys.user_buffer_create = aub_user_buffer_create;
+ iws->winsys.buffer_map = aub_buffer_map;
+ iws->winsys.buffer_unmap = aub_buffer_unmap;
+ iws->winsys.buffer_reference = aub_buffer_reference;
+ iws->winsys.buffer_data = aub_buffer_data;
+ iws->winsys.buffer_subdata = aub_buffer_subdata;
+ iws->winsys.buffer_get_subdata = aub_buffer_get_subdata;
+ iws->winsys.flush_frontbuffer = aub_flush_frontbuffer;
+ iws->winsys.printf = aub_printf;
+ iws->winsys.get_name = aub_get_name;
+
+ iws->winsys.surface_alloc = aub_i915_surface_alloc;
+ iws->winsys.surface_alloc_storage = aub_i915_surface_alloc_storage;
+ iws->winsys.surface_release = aub_i915_surface_release;
+
+ iws->aubfile = brw_aubfile_create();
+ iws->size = AUB_BUF_SIZE;
+ iws->pool = malloc(AUB_BUF_SIZE);
+
+ /* HACK: static copy of this pointer:
+ */
+ assert(global_winsys == NULL);
+ global_winsys = iws;
+
+ return &iws->winsys;
+}
+
+
+void
+xmesa_destroy_pipe_winsys_aub( struct pipe_winsys *winsys )
+
+{
+ struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
+ brw_aub_destroy(iws->aubfile);
+ free(iws->pool);
+ free(iws);
+}
+
+
+
+
+
+
+
+#define IWS_BATCHBUFFER_SIZE 1024
+
+struct aub_brw_winsys {
+ struct brw_winsys winsys; /**< batch buffer funcs */
+ struct aub_context *aub;
+
+ struct pipe_winsys *pipe_winsys;
+
+ unsigned data[IWS_BATCHBUFFER_SIZE];
+ unsigned nr;
+ unsigned size;
+};
+
+
+/* Turn a i965simple winsys into an aub/i965simple winsys:
+ */
+static inline struct aub_brw_winsys *
+aub_brw_winsys( struct brw_winsys *sws )
+{
+ return (struct aub_brw_winsys *)sws;
+}
+
+
+/* Simple batchbuffer interface:
+ */
+
+static unsigned *aub_i965_batch_start( struct brw_winsys *sws,
+ unsigned dwords,
+ unsigned relocs )
+{
+ struct aub_brw_winsys *iws = aub_brw_winsys(sws);
+
+ if (iws->size < iws->nr + dwords)
+ return NULL;
+
+ return (void *)1; /* not a valid pointer! */
+}
+
+static void aub_i965_batch_dword( struct brw_winsys *sws,
+ unsigned dword )
+{
+ struct aub_brw_winsys *iws = aub_brw_winsys(sws);
+
+ iws->data[iws->nr++] = dword;
+}
+
+static void aub_i965_batch_reloc( struct brw_winsys *sws,
+ struct pipe_buffer_handle *buf,
+ unsigned access_flags,
+ unsigned delta )
+{
+ struct aub_brw_winsys *iws = aub_brw_winsys(sws);
+
+ iws->data[iws->nr++] = aub_bo(buf)->offset + delta;
+}
+
+
+
+static void aub_i965_batch_flush( struct brw_winsys *sws,
+ struct pipe_fence_handle **fence )
+{
+ struct aub_brw_winsys *iws = aub_brw_winsys(sws);
+ assert(iws->nr <= iws->size);
+
+ if (iws->nr)
+ xmesa_commands_aub( iws->pipe_winsys,
+ iws->data,
+ iws->nr );
+ iws->nr = 0;
+}
+
+
+
+static void aub_i965_buffer_subdata_typed(struct brw_winsys *winsys,
+ struct pipe_buffer_handle *buf,
+ unsigned long offset,
+ unsigned long size,
+ const void *data,
+ unsigned data_type)
+{
+ struct aub_brw_winsys *iws = aub_brw_winsys(winsys);
+ unsigned aub_type = DW_GENERAL_STATE;
+ unsigned aub_sub_type;
+
+ switch (data_type) {
+ case BRW_CC_VP:
+ aub_sub_type = DWGS_COLOR_CALC_VIEWPORT_STATE;
+ break;
+ case BRW_CC_UNIT:
+ aub_sub_type = DWGS_COLOR_CALC_STATE;
+ break;
+ case BRW_WM_PROG:
+ aub_sub_type = DWGS_KERNEL_INSTRUCTIONS;
+ break;
+ case BRW_SAMPLER_DEFAULT_COLOR:
+ aub_sub_type = DWGS_SAMPLER_DEFAULT_COLOR;
+ break;
+ case BRW_SAMPLER:
+ aub_sub_type = DWGS_SAMPLER_STATE;
+ break;
+ case BRW_WM_UNIT:
+ aub_sub_type = DWGS_WINDOWER_IZ_STATE;
+ break;
+ case BRW_SF_PROG:
+ aub_sub_type = DWGS_KERNEL_INSTRUCTIONS;
+ break;
+ case BRW_SF_VP:
+ aub_sub_type = DWGS_STRIPS_FANS_VIEWPORT_STATE;
+ break;
+ case BRW_SF_UNIT:
+ aub_sub_type = DWGS_STRIPS_FANS_STATE;
+ break;
+ case BRW_VS_UNIT:
+ aub_sub_type = DWGS_VERTEX_SHADER_STATE;
+ break;
+ case BRW_VS_PROG:
+ aub_sub_type = DWGS_KERNEL_INSTRUCTIONS;
+ break;
+ case BRW_GS_UNIT:
+ aub_sub_type = DWGS_GEOMETRY_SHADER_STATE;
+ break;
+ case BRW_GS_PROG:
+ aub_sub_type = DWGS_KERNEL_INSTRUCTIONS;
+ break;
+ case BRW_CLIP_VP:
+ aub_sub_type = DWGS_CLIPPER_VIEWPORT_STATE;
+ break;
+ case BRW_CLIP_UNIT:
+ aub_sub_type = DWGS_CLIPPER_STATE;
+ break;
+ case BRW_CLIP_PROG:
+ aub_sub_type = DWGS_KERNEL_INSTRUCTIONS;
+ break;
+ case BRW_SS_SURFACE:
+ aub_type = DW_SURFACE_STATE;
+ aub_sub_type = DWSS_SURFACE_STATE;
+ break;
+ case BRW_SS_SURF_BIND:
+ aub_type = DW_SURFACE_STATE;
+ aub_sub_type = DWSS_BINDING_TABLE_STATE;
+ break;
+ }
+
+ xmesa_buffer_subdata_aub( iws->pipe_winsys,
+ buf,
+ offset,
+ size,
+ data,
+ aub_type,
+ aub_sub_type );
+}
+
+/**
+ * Create i965 hardware rendering context.
+ */
+struct pipe_context *
+xmesa_create_i965simple( struct pipe_winsys *winsys )
+{
+ struct aub_brw_winsys *iws = CALLOC_STRUCT( aub_brw_winsys );
+
+ /* Fill in this struct with callbacks that i965simple will need to
+ * communicate with the window system, buffer manager, etc.
+ */
+ iws->winsys.batch_start = aub_i965_batch_start;
+ iws->winsys.batch_dword = aub_i965_batch_dword;
+ iws->winsys.batch_reloc = aub_i965_batch_reloc;
+ iws->winsys.batch_flush = aub_i965_batch_flush;
+ iws->winsys.buffer_subdata_typed = aub_i965_buffer_subdata_typed;
+
+ iws->pipe_winsys = winsys;
+
+ iws->size = IWS_BATCHBUFFER_SIZE;
+
+ /* Create the i965simple context:
+ */
+ return brw_create( winsys,
+ &iws->winsys,
+ 0 );
+}
diff --git a/src/mesa/pipe/xlib/xm_winsys_aub.h b/src/mesa/pipe/xlib/xm_winsys_aub.h
new file mode 100644
index 0000000000..c0fe449107
--- /dev/null
+++ b/src/mesa/pipe/xlib/xm_winsys_aub.h
@@ -0,0 +1,67 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef AUB_WINSYS_H
+#define AUB_WINSYS_H
+
+struct pipe_context;
+struct pipe_winsys;
+struct pipe_buffer_handle;
+struct pipe_surface;
+
+struct pipe_winsys *
+xmesa_create_pipe_winsys_aub( void );
+
+void
+xmesa_destroy_pipe_winsys_aub( struct pipe_winsys *winsys );
+
+
+
+struct pipe_context *
+xmesa_create_i965simple( struct pipe_winsys *winsys );
+
+
+
+void xmesa_buffer_subdata_aub(struct pipe_winsys *winsys,
+ struct pipe_buffer_handle *buf,
+ unsigned long offset,
+ unsigned long size,
+ const void *data,
+ unsigned aub_type,
+ unsigned aub_sub_type);
+
+void xmesa_commands_aub(struct pipe_winsys *winsys,
+ unsigned *cmds,
+ unsigned nr_dwords);
+
+
+void xmesa_display_aub( /* struct pipe_winsys *winsys, */
+ struct pipe_surface *surface );
+
+struct pipe_winsys *xmesa_get_pipe_winsys_aub(void);
+
+#endif
diff --git a/src/mesa/pipe/xlib/xmesaP.h b/src/mesa/pipe/xlib/xmesaP.h
index e8c2a597a6..fa8d1f14b9 100644
--- a/src/mesa/pipe/xlib/xmesaP.h
+++ b/src/mesa/pipe/xlib/xmesaP.h
@@ -41,6 +41,12 @@ extern _glthread_Mutex _xmesa_lock;
extern XMesaBuffer XMesaBufferList;
+/*
+ */
+#define XMESA_SOFTPIPE 1
+#define XMESA_AUB 2
+extern int xmesa_mode;
+
/**
* Visual inforation, derived from GLvisual.
diff --git a/src/mesa/sources b/src/mesa/sources
index 32e1805eca..fb059498f9 100644
--- a/src/mesa/sources
+++ b/src/mesa/sources
@@ -162,7 +162,6 @@ DRAW_SOURCES = \
pipe/draw/draw_context.c\
pipe/draw/draw_cull.c \
pipe/draw/draw_debug.c \
- pipe/draw/draw_feedback.c \
pipe/draw/draw_flatshade.c \
pipe/draw/draw_offset.c \
pipe/draw/draw_prim.c \
@@ -201,7 +200,6 @@ STATETRACKER_SOURCES = \
state_tracker/st_atom.c \
state_tracker/st_atom_alphatest.c \
state_tracker/st_atom_blend.c \
- state_tracker/st_atom_clear_color.c \
state_tracker/st_atom_clip.c \
state_tracker/st_atom_constbuf.c \
state_tracker/st_atom_depth.c \
@@ -334,7 +332,9 @@ X11_DRIVER_SOURCES = \
pipe/xlib/fakeglx.c \
pipe/xlib/xfonts.c \
pipe/xlib/xm_api.c \
- pipe/xlib/xm_winsys.c
+ pipe/xlib/xm_winsys.c \
+ pipe/xlib/xm_winsys_aub.c \
+ pipe/xlib/brw_aub.c
OSMESA_DRIVER_SOURCES = \
drivers/osmesa/osmesa.c
diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 0797ea615e..bde81edd8c 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -45,7 +45,6 @@
static const struct st_tracked_state *atoms[] =
{
&st_update_framebuffer,
- &st_update_clear_color,
&st_update_depth_stencil,
&st_update_clip,
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index 3c0db0db09..0114f42ba5 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -46,7 +46,6 @@ void st_validate_state( struct st_context *st );
const struct st_tracked_state st_update_framebuffer;
const struct st_tracked_state st_update_clip;
-const struct st_tracked_state st_update_clear_color;
const struct st_tracked_state st_update_depth_stencil;
const struct st_tracked_state st_update_shader;
const struct st_tracked_state st_update_rasterizer;
diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c
index f0f360e6dd..e785434cec 100644
--- a/src/mesa/state_tracker/st_atom_depth.c
+++ b/src/mesa/state_tracker/st_atom_depth.c
@@ -101,7 +101,6 @@ update_depth_stencil(struct st_context *st)
depth_stencil.depth.enabled = st->ctx->Depth.Test;
depth_stencil.depth.writemask = st->ctx->Depth.Mask;
depth_stencil.depth.func = st_compare_func_to_pipe(st->ctx->Depth.Func);
- depth_stencil.depth.clear = st->ctx->Depth.Clear;
if (st->ctx->Query.CurrentOcclusionObject &&
st->ctx->Query.CurrentOcclusionObject->Active)
@@ -126,7 +125,6 @@ update_depth_stencil(struct st_context *st)
depth_stencil.stencil.value_mask[1] = st->ctx->Stencil.ValueMask[1] & 0xff;
depth_stencil.stencil.write_mask[1] = st->ctx->Stencil.WriteMask[1] & 0xff;
}
- depth_stencil.stencil.clear_value = st->ctx->Stencil.Clear & 0xff;
}
cso = st_cached_depth_stencil_state(st, &depth_stencil);
diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c
index 67a9159069..052b6dd144 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -172,26 +172,13 @@ update_samplers(struct st_context *st)
st->pipe->bind_sampler_state(st->pipe, u, cso->data);
}
}
-
-
- /* mapping from sampler vars to texture units */
- {
- struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current;
- uint sample_units[PIPE_MAX_SAMPLERS];
- uint s;
- for (s = 0; s < PIPE_MAX_SAMPLERS; s++) {
- sample_units[s] = fprog->Base.SamplerUnits[s];
- }
-
- st->pipe->set_sampler_units(st->pipe, PIPE_MAX_SAMPLERS, sample_units);
- }
}
const struct st_tracked_state st_update_sampler = {
.name = "st_update_sampler",
.dirty = {
- .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
+ .mesa = _NEW_TEXTURE,
.st = 0,
},
.update = update_samplers
diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index c4e5af02d5..fb21d29c40 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -46,20 +46,24 @@
static void
update_textures(struct st_context *st)
{
- GLuint u;
+ GLuint s;
- for (u = 0; u < st->ctx->Const.MaxTextureImageUnits; u++) {
+ /* ST_NEW_FRAGMENT_PROGRAM
+ */
+ struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current;
+
+ for (s = 0; s < st->ctx->Const.MaxTextureCoordUnits; s++) {
+ GLuint su = fprog->Base.SamplerUnits[s];
+
struct gl_texture_object *texObj
- = st->ctx->Texture.Unit[u]._Current;
+ = st->ctx->Texture.Unit[su]._Current;
+
struct pipe_texture *pt;
+
if (texObj) {
GLboolean flush, retval;
- retval = st_finalize_texture(st->ctx, st->pipe, u, &flush);
-#if 0
- printf("finalize_texture returned %d, flush = %d\n",
- retval, flush);
-#endif
+ retval = st_finalize_texture(st->ctx, st->pipe, texObj, &flush);
pt = st_get_texobj_texture(texObj);
}
@@ -67,8 +71,14 @@ update_textures(struct st_context *st)
pt = NULL;
}
- st->state.texture[u] = pt;
- st->pipe->set_texture_state(st->pipe, u, pt);
+ /* XXX: need to ensure that textures are unbound/removed from
+ * this table before being deleted, otherwise the pointer
+ * comparison below could fail.
+ */
+ if (st->state.sampler_texture[s] != pt) {
+ st->state.sampler_texture[s] = pt;
+ st->pipe->set_sampler_texture(st->pipe, s, pt);
+ }
}
}
@@ -77,7 +87,7 @@ const struct st_tracked_state st_update_texture = {
.name = "st_update_texture",
.dirty = {
.mesa = _NEW_TEXTURE,
- .st = 0,
+ .st = ST_NEW_FRAGMENT_PROGRAM,
},
.update = update_textures
};
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 0179000353..0bc48b7039 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -469,8 +469,8 @@ make_texture(struct st_context *st,
assert(pipeFormat);
cpp = st_sizeof_format(pipeFormat);
- pt = st_texture_create(st, PIPE_TEXTURE_2D, pipeFormat, baseFormat, 0, 0,
- width, height, 1, 0);
+ pt = st_texture_create(st, PIPE_TEXTURE_2D, pipeFormat, 0, 0, width, height,
+ 1, 0);
if (!pt)
return NULL;
@@ -697,7 +697,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
/* texture state: */
{
- pipe->set_texture_state(pipe, unit, pt);
+ pipe->set_sampler_texture(pipe, unit, pt);
}
/* Compute window coords (y=0=bottom) with pixel zoom.
@@ -719,7 +719,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
pipe->bind_rasterizer_state(pipe, ctx->st->state.rasterizer->data);
pipe->bind_fs_state(pipe, ctx->st->state.fs->data);
pipe->bind_vs_state(pipe, ctx->st->state.vs->data);
- pipe->set_texture_state(pipe, unit, ctx->st->state.texture[unit]);
+ pipe->set_sampler_texture(pipe, unit, ctx->st->state.sampler_texture[unit]);
pipe->bind_sampler_state(pipe, unit, ctx->st->state.sampler[unit]->data);
pipe->set_viewport_state(pipe, &ctx->st->state.viewport);
}
@@ -987,7 +987,6 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height,
struct pipe_context *pipe = ctx->st->pipe;
struct pipe_surface *surface;
uint format = 0, cpp, comp;
- GLenum internal_format;
ubyte *dest;
struct pipe_texture *pt;
int row, col;
@@ -995,13 +994,11 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height,
/* find a texture format we know */
if (pipe->is_format_supported( pipe, PIPE_FORMAT_U_I8, PIPE_TEXTURE )) {
format = PIPE_FORMAT_U_I8;
- internal_format = GL_INTENSITY8;
cpp = 1;
comp = 0;
}
else if (pipe->is_format_supported( pipe, PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_TEXTURE )) {
format = PIPE_FORMAT_A8R8G8B8_UNORM;
- internal_format = GL_RGBA8;
cpp = 4;
comp = 3; /* alpha channel */ /*XXX little-endian dependency */
}
@@ -1013,8 +1010,8 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height,
/**
* Create a texture.
*/
- pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, internal_format,
- 0, 0, width, height, 1, 0);
+ pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, 0, 0, width, height,
+ 1, 0);
if (!pt)
return NULL;
@@ -1237,8 +1234,8 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
psRead = rbRead->surface;
format = psRead->format;
- pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format,
- rbRead->Base.InternalFormat, 0, 0, width, height, 1, 0);
+ pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, 0, 0, width, height,
+ 1, 0);
if (!pt)
return;
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 6e9e7e3a24..047de412e3 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -90,29 +90,15 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
uint type = strb->screenSurface ? PIPE_SCREEN_SURFACE : PIPE_SURFACE;
const enum pipe_format pipeFormat
= st_choose_pipe_format(pipe, internalFormat, GL_NONE, GL_NONE, type);
- GLuint cpp;
GLbitfield flags = 0x0; /* XXX needed? */
- cpp = init_renderbuffer_bits(strb, pipeFormat);
- assert(cpp);
-
- if (strb->surface && strb->surface->format != pipeFormat) {
- /* need to change surface types, free this surface */
- pipe_surface_reference(&strb->surface, NULL);
- assert(strb->surface == NULL);
- }
-
if (!strb->surface) {
- strb->surface = pipe->winsys->surface_alloc(pipe->winsys, pipeFormat);
+ strb->surface = pipe->winsys->surface_alloc(pipe->winsys);
assert(strb->surface);
if (!strb->surface)
return GL_FALSE;
- strb->surface->cpp = cpp;
}
- strb->surface->pitch = pipe->winsys->surface_pitch(pipe->winsys, cpp,
- width, flags);
-
/* loop here since mapping is refcounted */
while (strb->surface->map)
pipe_surface_unmap(strb->surface);
@@ -120,19 +106,24 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
pipe->winsys->buffer_reference(pipe->winsys, &strb->surface->buffer,
NULL);
- strb->surface->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, 0, 0);
+ pipe->winsys->surface_alloc_storage(pipe->winsys,
+ strb->surface,
+ width,
+ height,
+ pipeFormat,
+ flags);
if (!strb->surface->buffer)
return GL_FALSE; /* out of memory, try s/w buffer? */
- pipe->winsys->buffer_data(pipe->winsys, strb->surface->buffer,
- strb->surface->pitch * cpp * height, NULL,
- PIPE_BUFFER_USAGE_PIXEL);
-
ASSERT(strb->surface->buffer);
ASSERT(strb->surface->format);
+ ASSERT(strb->surface->cpp);
+ ASSERT(strb->surface->width == width);
+ ASSERT(strb->surface->height == height);
+ ASSERT(strb->surface->pitch);
- strb->Base.Width = strb->surface->width = width;
- strb->Base.Height = strb->surface->height = height;
+ strb->Base.Width = width;
+ strb->Base.Height = height;
return GL_TRUE;
}
diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c
index 5b95dd7fd3..c1d0d086b4 100644
--- a/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/src/mesa/state_tracker/st_cb_queryobj.c
@@ -47,7 +47,7 @@
struct st_query_object
{
struct gl_query_object base;
- struct pipe_query_object pq;
+ struct pipe_query *pq;
};
@@ -68,12 +68,28 @@ st_NewQueryObject(GLcontext *ctx, GLuint id)
if (stq) {
stq->base.Id = id;
stq->base.Ready = GL_TRUE;
+ stq->pq = NULL;
return &stq->base;
}
return NULL;
}
+
+static void
+st_DeleteQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct pipe_context *pipe = ctx->st->pipe;
+ struct st_query_object *stq = st_query_object(q);
+
+ if (stq->pq) {
+ pipe->destroy_query(pipe, stq->pq);
+ stq->pq = NULL;
+ }
+
+ FREE(stq);
+}
+
/**
* Do glReadPixels by getting rows from the framebuffer surface with
* get_tile(). Convert to requested format/type with Mesa image routines.
@@ -85,25 +101,17 @@ st_BeginQuery(GLcontext *ctx, struct gl_query_object *q)
struct pipe_context *pipe = ctx->st->pipe;
struct st_query_object *stq = st_query_object(q);
- stq->pq.count = 0;
-
switch (q->Target) {
case GL_SAMPLES_PASSED_ARB:
- stq->pq.type = PIPE_QUERY_OCCLUSION_COUNTER;
- break;
- case GL_PRIMITIVES_GENERATED_NV:
- /* someday */
- stq->pq.type = PIPE_QUERY_PRIMITIVES_GENERATED;
- break;
- case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN_NV:
- /* someday */
- stq->pq.type = PIPE_QUERY_PRIMITIVES_EMITTED;
+ if (!stq->pq)
+ stq->pq = pipe->create_query( pipe, PIPE_QUERY_OCCLUSION_COUNTER );
break;
default:
assert(0);
+ return;
}
- pipe->begin_query(pipe, &stq->pq);
+ pipe->begin_query(pipe, stq->pq);
}
@@ -113,10 +121,7 @@ st_EndQuery(GLcontext *ctx, struct gl_query_object *q)
struct pipe_context *pipe = ctx->st->pipe;
struct st_query_object *stq = st_query_object(q);
- pipe->end_query(pipe, &stq->pq);
- stq->base.Ready = stq->pq.ready;
- if (stq->base.Ready)
- stq->base.Result = stq->pq.count;
+ pipe->end_query(pipe, stq->pq);
}
@@ -129,17 +134,42 @@ st_WaitQuery(GLcontext *ctx, struct gl_query_object *q)
/* this function should only be called if we don't have a ready result */
assert(!stq->base.Ready);
- pipe->wait_query(pipe, &stq->pq);
+ while (!stq->base.Ready &&
+ !pipe->get_query_result(pipe,
+ stq->pq,
+ TRUE,
+ &q->Result))
+ {
+ /* nothing */
+ }
+
q->Ready = GL_TRUE;
- q->Result = stq->pq.count;
}
+static void
+st_CheckQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct pipe_context *pipe = ctx->st->pipe;
+ struct st_query_object *stq = st_query_object(q);
+
+ if (!q->Ready) {
+ q->Ready = pipe->get_query_result(pipe,
+ stq->pq,
+ FALSE,
+ &q->Result);
+ }
+}
+
+
+
void st_init_query_functions(struct dd_function_table *functions)
{
functions->NewQueryObject = st_NewQueryObject;
+ functions->DeleteQuery = st_DeleteQuery;
functions->BeginQuery = st_BeginQuery;
functions->EndQuery = st_EndQuery;
functions->WaitQuery = st_WaitQuery;
+ functions->CheckQuery = st_CheckQuery;
}
diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c
index 5279cb1cd4..852cff6490 100644
--- a/src/mesa/state_tracker/st_cb_rasterpos.c
+++ b/src/mesa/state_tracker/st_cb_rasterpos.c
@@ -173,6 +173,7 @@ static struct rastpos_stage *
new_draw_rastpos_stage(GLcontext *ctx, struct draw_context *draw)
{
struct rastpos_stage *rs = CALLOC_STRUCT(rastpos_stage);
+ GLuint i;
rs->stage.draw = draw;
rs->stage.next = NULL;
@@ -184,6 +185,26 @@ new_draw_rastpos_stage(GLcontext *ctx, struct draw_context *draw)
rs->stage.reset_stipple_counter = rastpos_reset_stipple_counter;
rs->ctx = ctx;
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ rs->array[i].Size = 4;
+ rs->array[i].Type = GL_FLOAT;
+ rs->array[i].Stride = 0;
+ rs->array[i].StrideB = 0;
+ rs->array[i].Ptr = (GLubyte *) ctx->Current.Attrib[i];
+ rs->array[i].Enabled = GL_TRUE;
+ rs->array[i].Normalized = GL_TRUE;
+ rs->array[i].BufferObj = NULL;
+ rs->arrays[i] = &rs->array[i];
+ }
+
+ rs->prim.mode = GL_POINTS;
+ rs->prim.indexed = 0;
+ rs->prim.begin = 1;
+ rs->prim.end = 1;
+ rs->prim.weak = 0;
+ rs->prim.start = 0;
+ rs->prim.count = 1;
+
return rs;
}
@@ -201,31 +222,8 @@ st_RasterPos(GLcontext *ctx, const GLfloat v[4])
}
else {
/* create rastpos draw stage */
- GLuint i;
-
rs = new_draw_rastpos_stage(ctx, draw);
st->rastpos_stage = &rs->stage;
-
- /* one-time init */
- for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- rs->array[i].Size = 4;
- rs->array[i].Type = GL_FLOAT;
- rs->array[i].Stride = 0;
- rs->array[i].StrideB = 0;
- rs->array[i].Ptr = (GLubyte *) ctx->Current.Attrib[i];
- rs->array[i].Enabled = GL_TRUE;
- rs->array[i].Normalized = GL_TRUE;
- rs->array[i].BufferObj = NULL;
- rs->arrays[i] = &rs->array[i];
- }
-
- rs->prim.mode = GL_POINTS;
- rs->prim.indexed = 0;
- rs->prim.begin = 1;
- rs->prim.end = 1;
- rs->prim.weak = 0;
- rs->prim.start = 0;
- rs->prim.count = 1;
}
/* plug our rastpos stage into the draw module */
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 14f8d38def..e813bdb47a 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -366,7 +366,6 @@ guess_and_alloc_texture(struct st_context *st,
stObj->pt = st_texture_create(st,
gl_target_to_pipe(stObj->base.Target),
st_mesa_format_to_pipe_format(stImage->base.TexFormat->MesaFormat),
- stImage->base.InternalFormat,
firstLevel,
lastLevel,
width,
@@ -487,18 +486,6 @@ try_pbo_upload(GLcontext *ctx,
-static GLboolean
-try_pbo_zcopy(GLcontext *ctx,
- struct st_texture_image *stImage,
- const struct gl_pixelstore_attrib *unpack,
- GLint internalFormat,
- GLint width, GLint height,
- GLenum format, GLenum type, const void *pixels)
-{
- return GL_FALSE;
-}
-
-
@@ -618,24 +605,6 @@ st_TexImage(GLcontext * ctx,
DBG("trying pbo upload\n");
- /* Attempt to texture directly from PBO data (zero copy upload).
- *
- * Currently disable as it can lead to worse as well as better
- * performance (in particular when pipe_region_cow() is
- * required).
- */
- if (stObj->pt == stImage->pt &&
- stObj->pt->first_level == level &&
- stObj->pt->last_level == level) {
-
- if (try_pbo_zcopy(intel, stImage, unpack,
- internalFormat,
- width, height, format, type, pixels)) {
-
- DBG("pbo zcopy upload succeeded\n");
- return;
- }
- }
/* Otherwise, attempt to use the blitter for PBO image uploads.
@@ -652,7 +621,6 @@ st_TexImage(GLcontext * ctx,
#else
(void) try_pbo_upload;
(void) check_pbo_format;
- (void) try_pbo_zcopy;
#endif
@@ -1443,10 +1411,10 @@ copy_image_data_to_texture(struct st_context *st,
*/
GLboolean
st_finalize_texture(GLcontext *ctx,
- struct pipe_context *pipe, GLuint unit,
+ struct pipe_context *pipe,
+ struct gl_texture_object *tObj,
GLboolean *needFlush)
{
- struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
struct st_texture_object *stObj = st_texture_object(tObj);
int comp_byte = 0;
int cpp;
@@ -1512,7 +1480,8 @@ st_finalize_texture(GLcontext *ctx,
*/
if (stObj->pt &&
(stObj->pt->target != gl_target_to_pipe(stObj->base.Target) ||
- stObj->pt->internal_format != firstImage->base.InternalFormat ||
+ stObj->pt->format !=
+ st_mesa_format_to_pipe_format(firstImage->base.TexFormat->MesaFormat) ||
stObj->pt->first_level != stObj->firstLevel ||
stObj->pt->last_level != stObj->lastLevel ||
stObj->pt->width[0] != firstImage->base.Width ||
@@ -1530,7 +1499,6 @@ st_finalize_texture(GLcontext *ctx,
stObj->pt = st_texture_create(ctx->st,
gl_target_to_pipe(stObj->base.Target),
st_mesa_format_to_pipe_format(firstImage->base.TexFormat->MesaFormat),
- firstImage->base.InternalFormat,
stObj->firstLevel,
stObj->lastLevel,
firstImage->base.Width,
diff --git a/src/mesa/state_tracker/st_cb_texture.h b/src/mesa/state_tracker/st_cb_texture.h
index 7f8082b029..878256ec26 100644
--- a/src/mesa/state_tracker/st_cb_texture.h
+++ b/src/mesa/state_tracker/st_cb_texture.h
@@ -8,7 +8,8 @@ st_get_texobj_texture(struct gl_texture_object *texObj);
extern GLboolean
st_finalize_texture(GLcontext *ctx,
- struct pipe_context *pipe, GLuint unit,
+ struct pipe_context *pipe,
+ struct gl_texture_object *tObj,
GLboolean *needFlush);
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index c31b76c63f..87646b3c71 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -103,12 +103,10 @@ struct st_context
const struct cso_vertex_shader *vs;
struct pipe_blend_color blend_color;
- struct pipe_clear_color_state clear_color;
struct pipe_clip_state clip;
struct pipe_constant_buffer constants[2];
- struct pipe_feedback_state feedback;
struct pipe_framebuffer_state framebuffer;
- struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_texture *sampler_texture[PIPE_MAX_SAMPLERS];
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
struct pipe_viewport_state viewport;
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 24bb4823bc..274ae86a3e 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -199,6 +199,7 @@ static void
create_default_attribs_buffer(struct st_context *st)
{
struct pipe_context *pipe = st->pipe;
+ /* XXX don't hardcode magic 32 here */
st->default_attrib_buffer = pipe->winsys->buffer_create( pipe->winsys, 32, 0, 0 );
}
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index c292a975f3..98cc2084f0 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -254,8 +254,12 @@ st_mesa_format_to_pipe_format(GLuint mesaFormat)
case MESA_FORMAT_ARGB8888_REV:
case MESA_FORMAT_ARGB8888:
return PIPE_FORMAT_A8R8G8B8_UNORM;
+ case MESA_FORMAT_ARGB1555:
+ return PIPE_FORMAT_A1R5G5B5_UNORM;
case MESA_FORMAT_ARGB4444:
return PIPE_FORMAT_A4R4G4B4_UNORM;
+ case MESA_FORMAT_RGB565:
+ return PIPE_FORMAT_R5G6B5_UNORM;
case MESA_FORMAT_AL88:
return PIPE_FORMAT_U_A8_L8;
case MESA_FORMAT_A8:
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index 1ec4514873..c7d28eeca2 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -63,7 +63,6 @@ struct pipe_texture *
st_texture_create(struct st_context *st,
unsigned target,
unsigned format,
- GLenum internal_format,
GLuint first_level,
GLuint last_level,
GLuint width0,
@@ -77,7 +76,7 @@ st_texture_create(struct st_context *st,
DBG("%s target %s format %s level %d..%d\n", __FUNCTION__,
_mesa_lookup_enum_by_nr(target),
- _mesa_lookup_enum_by_nr(internal_format), first_level, last_level);
+ _mesa_lookup_enum_by_nr(format), first_level, last_level);
if (!pt)
return NULL;
@@ -86,7 +85,6 @@ st_texture_create(struct st_context *st,
pt->target = target;
pt->format = format;
- pt->internal_format = internal_format;
pt->first_level = first_level;
pt->last_level = last_level;
pt->width[0] = width0;
@@ -119,7 +117,7 @@ st_texture_match_image(struct pipe_texture *pt,
if (image->Border)
return GL_FALSE;
- if (image->InternalFormat != pt->internal_format ||
+ if (st_mesa_format_to_pipe_format(image->TexFormat->MesaFormat) != pt->format ||
image->IsCompressed != pt->compressed)
return GL_FALSE;
diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h
index 2be53abf3a..7524c219e0 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -39,7 +39,6 @@ extern struct pipe_texture *
st_texture_create(struct st_context *st,
unsigned target,
unsigned format,
- GLenum internal_format,
GLuint first_level,
GLuint last_level,
GLuint width0,