summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBen Skeggs <skeggsb@gmail.com>2008-03-19 22:51:17 +1100
committerBen Skeggs <skeggsb@gmail.com>2008-03-19 22:51:17 +1100
commit62767cf2dd1006621ecd6023b15d65b5cff41dfa (patch)
treefe287d4a281884467531d6ac53a29539f1fcd18a /src
parent176df85568992a5d99aab7f0b1e382d41459aa13 (diff)
parentec890533c2852fa62366d449e6fbc899fb0498be (diff)
Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c36
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h13
-rw-r--r--src/gallium/auxiliary/draw/draw_passthrough.c349
-rw-r--r--src/gallium/auxiliary/draw/draw_prim.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_vbuf.h6
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c16
-rw-r--r--src/gallium/auxiliary/util/Makefile4
-rw-r--r--src/gallium/auxiliary/util/SConscript4
-rw-r--r--src/gallium/auxiliary/util/u_blit.c276
-rw-r--r--src/gallium/auxiliary/util/u_blit.h60
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.c112
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.h37
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c877
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.h52
-rw-r--r--src/gallium/auxiliary/util/u_handle_table.c30
-rw-r--r--src/gallium/auxiliary/util/u_handle_table.h9
-rw-r--r--src/gallium/auxiliary/util/u_hash_table.c23
-rw-r--r--src/gallium/auxiliary/util/u_hash_table.h5
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h133
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.c263
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.h52
-rw-r--r--src/gallium/drivers/cell/common.h10
-rw-r--r--src/gallium/drivers/cell/ppu/Makefile1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h25
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.c37
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c21
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_per_fragment.c1075
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_per_fragment.h35
-rw-r--r--src/gallium/drivers/cell/spu/Makefile1
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c47
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h16
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c211
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.h32
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c4
-rw-r--r--src/gallium/drivers/cell/spu/spu_tile.h4
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c23
-rw-r--r--src/gallium/drivers/cell/spu/spu_ztest.h135
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_vbuf.c12
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.c86
-rw-r--r--src/gallium/include/pipe/p_debug.h10
-rw-r--r--src/gallium/include/pipe/p_util.h16
-rw-r--r--src/mesa/shader/arbprogparse.c8
-rw-r--r--src/mesa/sources1
-rw-r--r--src/mesa/state_tracker/st_cb_blit.c125
-rw-r--r--src/mesa/state_tracker/st_cb_blit.h46
-rw-r--r--src/mesa/state_tracker/st_cb_clear.c63
-rw-r--r--src/mesa/state_tracker/st_context.c5
-rw-r--r--src/mesa/state_tracker/st_context.h17
-rw-r--r--src/mesa/state_tracker/st_extensions.c1
-rw-r--r--src/mesa/state_tracker/st_gen_mipmap.c226
-rw-r--r--src/mesa/state_tracker/st_gen_mipmap.h2
-rw-r--r--src/mesa/tnl/t_vertex_sse.c4
-rw-r--r--src/mesa/vf/vf_sse.c2
53 files changed, 4069 insertions, 592 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index fd86bfaca9..294ac82281 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -149,6 +149,12 @@ void cso_set_blend(struct cso_context *ctx,
}
}
+void cso_unset_blend(struct cso_context *ctx)
+{
+ ctx->blend = NULL;
+}
+
+
void cso_single_sampler(struct cso_context *ctx,
unsigned idx,
const struct pipe_sampler_state *templ)
@@ -220,6 +226,15 @@ void cso_set_samplers( struct cso_context *ctx,
cso_single_sampler_done( ctx );
}
+void cso_unset_samplers( struct cso_context *ctx )
+{
+ uint i;
+ for (i = 0; i < ctx->nr_samplers; i++)
+ ctx->samplers[i] = NULL;
+}
+
+
+
void cso_set_depth_stencil_alpha(struct cso_context *ctx,
const struct pipe_depth_stencil_alpha_state *templ)
{
@@ -252,6 +267,11 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx,
}
}
+void cso_unset_depth_stencil_alpha(struct cso_context *ctx)
+{
+ ctx->depth_stencil = NULL;
+}
+
void cso_set_rasterizer(struct cso_context *ctx,
@@ -285,7 +305,10 @@ void cso_set_rasterizer(struct cso_context *ctx,
}
}
-
+void cso_unset_rasterizer(struct cso_context *ctx)
+{
+ ctx->rasterizer = NULL;
+}
@@ -320,6 +343,12 @@ void cso_set_fragment_shader(struct cso_context *ctx,
}
}
+void cso_unset_fragment_shader(struct cso_context *ctx)
+{
+ ctx->fragment_shader = NULL;
+}
+
+
void cso_set_vertex_shader(struct cso_context *ctx,
const struct pipe_shader_state *templ)
{
@@ -350,3 +379,8 @@ void cso_set_vertex_shader(struct cso_context *ctx,
ctx->pipe->bind_vs_state(ctx->pipe, handle);
}
}
+
+void cso_unset_vertex_shader(struct cso_context *ctx)
+{
+ ctx->vertex_shader = NULL;
+}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index 1f2a630804..6aa619abf5 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -44,16 +44,25 @@ struct cso_context *cso_create_context( struct pipe_context *pipe );
void cso_set_blend( struct cso_context *cso,
const struct pipe_blend_state *blend );
+void cso_unset_blend(struct cso_context *cso);
+
void cso_set_depth_stencil_alpha( struct cso_context *cso,
const struct pipe_depth_stencil_alpha_state *dsa );
+void cso_unset_depth_stencil_alpha( struct cso_context *cso );
+
void cso_set_rasterizer( struct cso_context *cso,
const struct pipe_rasterizer_state *rasterizer );
+void cso_unset_rasterizer( struct cso_context *cso );
+
void cso_set_samplers( struct cso_context *cso,
unsigned count,
const struct pipe_sampler_state **states );
+void cso_unset_samplers( struct cso_context *cso );
+
+
/* Alternate interface to support state trackers that like to modify
* samplers one at a time:
*/
@@ -72,9 +81,13 @@ void cso_single_sampler_done( struct cso_context *cso );
void cso_set_fragment_shader( struct cso_context *cso,
const struct pipe_shader_state *shader );
+void cso_unset_fragment_shader( struct cso_context *cso );
+
void cso_set_vertex_shader( struct cso_context *cso,
const struct pipe_shader_state *shader );
+void cso_unset_vertex_shader( struct cso_context *cso );
+
void cso_destroy_context( struct cso_context *cso );
diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c
index d16f056191..fdec6a591b 100644
--- a/src/gallium/auxiliary/draw/draw_passthrough.c
+++ b/src/gallium/auxiliary/draw/draw_passthrough.c
@@ -85,14 +85,52 @@ fetch_store_general( struct draw_context *draw,
const unsigned *pitch = draw->vertex_fetch.pitch;
const ubyte **src = draw->vertex_fetch.src_ptr;
- for (i = start; i < count; i++) {
+ for (i = start; i < start + count; i++) {
for (j = 0; j < nr_attrs; j++) {
+ /* vinfo->src_index is the output of the vertex shader
+ * matching this hw-vertex component.
+ *
+ * In passthrough, we require a 1:1 mapping between vertex
+ * shader outputs and inputs, which in turn correspond to
+ * vertex elements in the state. So, this is the vertex
+ * element we're interested in...
+ */
const uint jj = vinfo->src_index[j];
const enum pipe_format srcFormat = draw->vertex_element[jj].src_format;
const ubyte *from = src[jj] + i * pitch[jj];
float attrib[4];
+ /* Except... When we're not. Two cases EMIT_HEADER &
+ * EMIT_1F_PSIZE don't consume an input. Should have some
+ * method for indicating this, or change the logic here
+ * somewhat so it doesn't matter.
+ *
+ * Just hack this up now, do something better about it later.
+ */
+ if (vinfo->emit[j] == EMIT_HEADER) {
+ memset(out, 0, sizeof(struct vertex_header));
+ out += sizeof(struct vertex_header) / 4;
+ continue;
+ }
+ else if (vinfo->emit[j] == EMIT_1F_PSIZE) {
+ out[0] = 1.0; /* xxx */
+ out += 1;
+ continue;
+ }
+
+
+ /* The normal fetch/emit code:
+ */
switch (srcFormat) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ {
+ ubyte *ub = (ubyte *) from;
+ attrib[0] = UBYTE_TO_FLOAT(ub[0]);
+ attrib[1] = UBYTE_TO_FLOAT(ub[1]);
+ attrib[2] = UBYTE_TO_FLOAT(ub[2]);
+ attrib[3] = UBYTE_TO_FLOAT(ub[3]);
+ }
+ break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
{
float *f = (float *) from;
@@ -130,14 +168,21 @@ fetch_store_general( struct draw_context *draw,
}
break;
default:
- abort();
+ assert(0);
}
- /* XXX this will probably only work for softpipe */
+ debug_printf("attrib %d: %f %f %f %f\n", j,
+ attrib[0], attrib[1], attrib[2], attrib[3]);
+
switch (vinfo->emit[j]) {
- case EMIT_HEADER:
- memset(out, 0, sizeof(struct vertex_header));
- out += sizeof(struct vertex_header) / 4;
+ case EMIT_1F:
+ out[0] = attrib[0];
+ out += 1;
+ break;
+ case EMIT_2F:
+ out[0] = attrib[0];
+ out[1] = attrib[1];
+ out += 2;
break;
case EMIT_4F:
out[0] = attrib[0];
@@ -147,64 +192,15 @@ fetch_store_general( struct draw_context *draw,
out += 4;
break;
default:
- abort();
+ assert(0);
}
-
}
+ debug_printf("\n");
}
}
-/* Example of a fetch/emit passthrough shader which could be
- * generated when bypass_clipping is enabled on a passthrough vertex
- * shader.
- */
-static void fetch_xyz_rgb_st( struct draw_context *draw,
- float *out,
- unsigned start,
- unsigned count )
-{
- const unsigned *pitch = draw->vertex_fetch.pitch;
- const ubyte **src = draw->vertex_fetch.src_ptr;
- unsigned i;
-
- const ubyte *xyzw = src[0] + start * pitch[0];
- const ubyte *rgba = src[1] + start * pitch[1];
- const ubyte *st = src[2] + start * pitch[2];
-
- /* loop over vertex attributes (vertex shader inputs)
- */
- for (i = 0; i < count; i++) {
- {
- const float *in = (const float *)xyzw; xyzw += pitch[0];
- /* decode input, encode output. Assume both are float[4] */
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = in[3];
- }
-
- {
- const float *in = (const float *)rgba; rgba += pitch[1];
- /* decode input, encode output. Assume both are float[4] */
- out[4] = in[0];
- out[5] = in[1];
- out[6] = in[2];
- out[7] = in[3];
- }
-
- {
- const float *in = (const float *)st; st += pitch[2];
- /* decode input, encode output. Assume both are float[2] */
- out[8] = in[0];
- out[9] = in[1];
- }
-
- out += 10;
- }
-}
-
static boolean update_shader( struct draw_context *draw )
{
const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render);
@@ -229,70 +225,166 @@ static boolean update_shader( struct draw_context *draw )
draw->pt.hw_vertex_size = vinfo->size * 4;
- /* Just trying to figure out how this would work:
- */
- if (draw->rasterizer->bypass_vs ||
- (nr_attrs == 3 && 0 /* some other tests */))
- {
-#if 0
- draw->vertex_fetch.pt_fetch = fetch_xyz_rgb_st;
-#else
- draw->vertex_fetch.pt_fetch = fetch_store_general;
-#endif
- /*assert(vinfo->size == 10);*/
+ draw->vertex_fetch.pt_fetch = fetch_store_general;
+ return TRUE;
+}
+
+
+
+
+static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr)
+{
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ *first = 1;
+ *incr = 1;
+ return TRUE;
+ case PIPE_PRIM_LINES:
+ *first = 2;
+ *incr = 2;
return TRUE;
+ case PIPE_PRIM_LINE_STRIP:
+ *first = 2;
+ *incr = 1;
+ return TRUE;
+ case PIPE_PRIM_TRIANGLES:
+ *first = 3;
+ *incr = 3;
+ return TRUE;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ *first = 3;
+ *incr = 1;
+ return TRUE;
+ case PIPE_PRIM_QUADS:
+ *first = 4;
+ *incr = 4;
+ return TRUE;
+ case PIPE_PRIM_QUAD_STRIP:
+ *first = 4;
+ *incr = 2;
+ return TRUE;
+ default:
+ *first = 0;
+ *incr = 1; /* set to one so that count % incr works */
+ return FALSE;
}
-
- return FALSE;
}
static boolean set_prim( struct draw_context *draw,
- unsigned prim )
+ unsigned prim,
+ unsigned count )
{
assert(!draw->user.elts);
- draw->pt.prim = prim;
-
switch (prim) {
case PIPE_PRIM_LINE_LOOP:
+ if (count > 1024)
+ return FALSE;
+ return draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP );
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ if (count > 1024)
+ return FALSE;
+ return draw->render->set_primitive( draw->render, prim );
+
case PIPE_PRIM_QUADS:
case PIPE_PRIM_QUAD_STRIP:
- return FALSE;
+ return draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES );
+
default:
- draw->render->set_primitive( draw->render, prim );
- return TRUE;
+ return draw->render->set_primitive( draw->render, prim );
+ break;
}
+
+ return TRUE;
}
-boolean
-draw_passthrough_arrays(struct draw_context *draw,
- unsigned prim,
- unsigned start,
- unsigned count)
+
+#define INDEX(i) (start + (i))
+static void pt_draw_arrays( struct draw_context *draw,
+ unsigned start,
+ unsigned length )
{
- float *hw_verts;
+ ushort *tmp = NULL;
+ unsigned i, j;
- if (draw_need_pipeline(draw))
- return FALSE;
+ switch (draw->pt.prim) {
+ case PIPE_PRIM_LINE_LOOP:
+ tmp = MALLOC( sizeof(ushort) * (length + 1) );
- if (!set_prim(draw, prim))
- return FALSE;
+ for (i = 0; i < length; i++)
+ tmp[i] = INDEX(i);
+ tmp[length] = 0;
- if (!update_shader(draw))
- return FALSE;
+ draw->render->draw( draw->render,
+ tmp,
+ length+1 );
+ break;
- hw_verts = draw->render->allocate_vertices( draw->render,
- draw->pt.hw_vertex_size,
- count );
+
+ case PIPE_PRIM_QUAD_STRIP:
+ tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) );
+
+ for (j = i = 0; i + 3 < length; i += 2, j += 6) {
+ tmp[j+0] = INDEX(i+0);
+ tmp[j+1] = INDEX(i+1);
+ tmp[j+2] = INDEX(i+3);
+
+ tmp[j+3] = INDEX(i+2);
+ tmp[j+4] = INDEX(i+0);
+ tmp[j+5] = INDEX(i+3);
+ }
+
+ if (j)
+ draw->render->draw( draw->render, tmp, j );
+ break;
+
+ case PIPE_PRIM_QUADS:
+ tmp = MALLOC( sizeof(int) * (length / 4 * 6) );
+
+ for (j = i = 0; i + 3 < length; i += 4, j += 6) {
+ tmp[j+0] = INDEX(i+0);
+ tmp[j+1] = INDEX(i+1);
+ tmp[j+2] = INDEX(i+3);
+
+ tmp[j+3] = INDEX(i+1);
+ tmp[j+4] = INDEX(i+2);
+ tmp[j+5] = INDEX(i+3);
+ }
+
+ if (j)
+ draw->render->draw( draw->render, tmp, j );
+ break;
+
+ default:
+ draw->render->draw_arrays( draw->render,
+ start,
+ length );
+ break;
+ }
+
+ if (tmp)
+ FREE(tmp);
+}
+
+
+
+static boolean do_draw( struct draw_context *draw,
+ unsigned start, unsigned count )
+{
+ float *hw_verts =
+ draw->render->allocate_vertices( draw->render,
+ (ushort)draw->pt.hw_vertex_size,
+ (ushort)count );
if (!hw_verts)
return FALSE;
- /* Single routine to fetch vertices, run shader and emit HW verts.
- * Clipping and viewport transformation are done on hardware.
+ /* Single routine to fetch vertices and emit HW verts.
*/
draw->vertex_fetch.pt_fetch( draw,
hw_verts,
@@ -301,9 +393,9 @@ draw_passthrough_arrays(struct draw_context *draw,
/* Draw arrays path to avoid re-emitting index list again and
* again.
*/
- draw->render->draw_arrays( draw->render,
- start,
- count );
+ pt_draw_arrays( draw,
+ 0,
+ count );
draw->render->release_vertices( draw->render,
@@ -314,3 +406,68 @@ draw_passthrough_arrays(struct draw_context *draw,
return TRUE;
}
+
+boolean
+draw_passthrough_arrays(struct draw_context *draw,
+ unsigned prim,
+ unsigned start,
+ unsigned count)
+{
+ unsigned i = 0;
+ unsigned first, incr;
+
+ //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count);
+
+ split_prim_inplace(prim, &first, &incr);
+
+ count -= (count - first) % incr;
+
+ debug_printf("%s %d %d %d\n", __FUNCTION__, prim, start, count);
+
+ if (draw_need_pipeline(draw))
+ return FALSE;
+
+ debug_printf("%s AAA\n", __FUNCTION__);
+
+ if (!set_prim(draw, prim, count))
+ return FALSE;
+
+ /* XXX: need a single value that reflects the most recent call to
+ * driver->set_primitive:
+ */
+ draw->pt.prim = prim;
+
+ debug_printf("%s BBB\n", __FUNCTION__);
+
+ if (!update_shader(draw))
+ return FALSE;
+
+ debug_printf("%s CCC\n", __FUNCTION__);
+
+ /* Chop this up into bite-sized pieces that a driver should be able
+ * to devour -- problem is we don't have a quick way to query the
+ * driver on the maximum size for this chunk in the current state.
+ */
+ while (i + first <= count) {
+ int nr = MIN2( count - i, 1024 );
+
+ /* snap to prim boundary
+ */
+ nr -= (nr - first) % incr;
+
+ if (!do_draw( draw, start + i, nr )) {
+ assert(0);
+ return FALSE;
+ }
+
+ /* increment allowing for repeated vertices
+ */
+ i += nr - (first - incr);
+ }
+
+
+ debug_printf("%s DONE\n", __FUNCTION__);
+ return TRUE;
+}
+
+
diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c
index 888fa536ea..cb0277fb6c 100644
--- a/src/gallium/auxiliary/draw/draw_prim.c
+++ b/src/gallium/auxiliary/draw/draw_prim.c
@@ -169,11 +169,12 @@ void draw_do_flush( struct draw_context *draw, unsigned flags )
flushing = TRUE;
if (flags >= DRAW_FLUSH_SHADER_QUEUE) {
- if (draw->vs.queue_nr)
+ if (draw->vs.queue_nr) {
if (draw->rasterizer->bypass_vs)
fetch_and_store(draw);
else
(*draw->shader_queue_flush)(draw);
+ }
if (flags >= DRAW_FLUSH_PRIM_QUEUE) {
if (draw->pq.queue_nr)
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h
index 5e7de905c1..e90f37872a 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.h
+++ b/src/gallium/auxiliary/draw/draw_vbuf.h
@@ -74,9 +74,11 @@ struct vbuf_render {
/**
* Notify the renderer of the current primitive when it changes.
- * Prim is restricted to TRIANGLES, LINES and POINTS.
+ * Must succeed for TRIANGLES, LINES and POINTS. Other prims at
+ * the discretion of the driver, for the benefit of the passthrough
+ * path.
*/
- void (*set_primitive)( struct vbuf_render *, unsigned prim );
+ boolean (*set_primitive)( struct vbuf_render *, unsigned prim );
/**
* DrawElements, note indices are ushort:
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index a996218ce7..24be65bff9 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -316,7 +316,9 @@ void spe_init_func(struct spe_function *p, unsigned code_size)
void spe_release_func(struct spe_function *p)
{
- align_free(p->store);
+ if (p->store != NULL) {
+ align_free(p->store);
+ }
p->store = NULL;
p->csr = NULL;
}
@@ -326,8 +328,8 @@ int spe_allocate_available_register(struct spe_function *p)
{
unsigned i;
for (i = 0; i < 128; i++) {
- const uint64_t mask = (1ULL << (i % 128));
- const unsigned idx = i / 128;
+ const uint64_t mask = (1ULL << (i % 64));
+ const unsigned idx = i / 64;
if ((p->regs[idx] & mask) != 0) {
p->regs[idx] &= ~mask;
@@ -341,8 +343,8 @@ int spe_allocate_available_register(struct spe_function *p)
int spe_allocate_register(struct spe_function *p, int reg)
{
- const unsigned idx = reg / 128;
- const unsigned bit = reg % 128;
+ const unsigned idx = reg / 64;
+ const unsigned bit = reg % 64;
assert((p->regs[idx] & (1ULL << bit)) != 0);
@@ -353,8 +355,8 @@ int spe_allocate_register(struct spe_function *p, int reg)
void spe_release_register(struct spe_function *p, int reg)
{
- const unsigned idx = reg / 128;
- const unsigned bit = reg % 128;
+ const unsigned idx = reg / 64;
+ const unsigned bit = reg % 64;
assert((p->regs[idx] & (1ULL << bit)) == 0);
diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile
index 2016c6fb1f..9b6c2708b6 100644
--- a/src/gallium/auxiliary/util/Makefile
+++ b/src/gallium/auxiliary/util/Makefile
@@ -7,9 +7,13 @@ C_SOURCES = \
p_debug.c \
p_tile.c \
p_util.c \
+ u_blit.c \
+ u_draw_quad.c \
+ u_gen_mipmap.c \
u_handle_table.c \
u_hash_table.c \
u_mm.c \
+ u_simple_shaders.c \
u_snprintf.c
include ../../Makefile.template
diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript
index 154a3eca8c..b44f2d5e39 100644
--- a/src/gallium/auxiliary/util/SConscript
+++ b/src/gallium/auxiliary/util/SConscript
@@ -6,9 +6,13 @@ util = env.ConvenienceLibrary(
'p_debug.c',
'p_tile.c',
'p_util.c',
+ 'u_blit.c',
+ 'u_draw_quad.c',
+ 'u_gen_mipmap.c',
'u_handle_table.c',
'u_hash_table.c',
'u_mm.c',
+ 'u_simple_shaders.c',
'u_snprintf.c',
])
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
new file mode 100644
index 0000000000..4b4ab8185f
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -0,0 +1,276 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Copy/blit pixel rect between surfaces
+ *
+ * @author Brian Paul
+ */
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_util.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_draw_quad.h"
+#include "util/u_blit.h"
+#include "util/u_simple_shaders.h"
+
+
+struct blit_state
+{
+ struct pipe_context *pipe;
+
+ void *blend;
+ void *depthstencil;
+ void *rasterizer;
+ void *samplers[2]; /* one for linear, one for nearest sampling */
+
+ /*struct pipe_viewport_state viewport;*/
+ struct pipe_sampler_state *vs;
+ struct pipe_sampler_state *fs;
+};
+
+
+/**
+ * Create state object for blit.
+ * Intended to be created once and re-used for many blit() calls.
+ */
+struct blit_state *
+util_create_blit(struct pipe_context *pipe)
+{
+ struct pipe_blend_state blend;
+ struct pipe_depth_stencil_alpha_state depthstencil;
+ struct pipe_rasterizer_state rasterizer;
+ struct blit_state *ctx;
+ struct pipe_sampler_state sampler;
+
+ ctx = CALLOC_STRUCT(blit_state);
+ if (!ctx)
+ return NULL;
+
+ ctx->pipe = pipe;
+
+ /* we don't use blending, but need to set valid values */
+ memset(&blend, 0, sizeof(blend));
+ blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
+ blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
+ blend.colormask = PIPE_MASK_RGBA;
+ ctx->blend = pipe->create_blend_state(pipe, &blend);
+
+ /* depth/stencil/alpha */
+ memset(&depthstencil, 0, sizeof(depthstencil));
+ ctx->depthstencil = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil);
+
+ /* rasterizer */
+ memset(&rasterizer, 0, sizeof(rasterizer));
+ rasterizer.front_winding = PIPE_WINDING_CW;
+ rasterizer.cull_mode = PIPE_WINDING_NONE;
+ rasterizer.bypass_clipping = 1; /* bypasses viewport too */
+ /*rasterizer.bypass_vs = 1;*/
+ ctx->rasterizer = pipe->create_rasterizer_state(pipe, &rasterizer);
+
+ /* samplers */
+ memset(&sampler, 0, sizeof(sampler));
+ sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST;
+ sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST;
+ sampler.normalized_coords = 1;
+ ctx->samplers[0] = pipe->create_sampler_state(pipe, &sampler);
+
+ sampler.min_img_filter = PIPE_TEX_MIPFILTER_LINEAR;
+ sampler.mag_img_filter = PIPE_TEX_MIPFILTER_LINEAR;
+ ctx->samplers[1] = pipe->create_sampler_state(pipe, &sampler);
+
+
+#if 0
+ /* viewport */
+ ctx->viewport.scale[0] = 1.0;
+ ctx->viewport.scale[1] = 1.0;
+ ctx->viewport.scale[2] = 1.0;
+ ctx->viewport.scale[3] = 1.0;
+ ctx->viewport.translate[0] = 0.0;
+ ctx->viewport.translate[1] = 0.0;
+ ctx->viewport.translate[2] = 0.0;
+ ctx->viewport.translate[3] = 0.0;
+#endif
+
+ /* vertex shader */
+ {
+ const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_GENERIC };
+ const uint semantic_indexes[] = { 0, 0 };
+ ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
+ semantic_indexes);
+ }
+
+ /* fragment shader */
+ ctx->fs = util_make_fragment_tex_shader(pipe);
+
+ return ctx;
+}
+
+
+/**
+ * Destroy a blit context
+ */
+void
+util_destroy_blit(struct blit_state *ctx)
+{
+ struct pipe_context *pipe = ctx->pipe;
+
+ pipe->delete_blend_state(pipe, ctx->blend);
+ pipe->delete_depth_stencil_alpha_state(pipe, ctx->depthstencil);
+ pipe->delete_rasterizer_state(pipe, ctx->rasterizer);
+ pipe->delete_sampler_state(pipe, ctx->samplers[0]);
+ pipe->delete_sampler_state(pipe, ctx->samplers[1]);
+
+ pipe->delete_vs_state(pipe, ctx->vs);
+ pipe->delete_fs_state(pipe, ctx->fs);
+
+ FREE(ctx);
+}
+
+
+/**
+ * Copy pixel block from src surface to dst surface.
+ * Overlapping regions are acceptable.
+ * XXX need some control over blitting Z and/or stencil.
+ */
+void
+util_blit_pixels(struct blit_state *ctx,
+ struct pipe_surface *src,
+ int srcX0, int srcY0,
+ int srcX1, int srcY1,
+ struct pipe_surface *dst,
+ int dstX0, int dstY0,
+ int dstX1, int dstY1,
+ float z, uint filter)
+{
+ struct pipe_context *pipe = ctx->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_texture texTemp, *tex;
+ struct pipe_surface *texSurf;
+ struct pipe_framebuffer_state fb;
+ const int srcW = abs(srcX1 - srcX0);
+ const int srcH = abs(srcY1 - srcY0);
+ const int srcLeft = MIN2(srcX0, srcX1);
+ const int srcTop = MIN2(srcY0, srcY1);
+
+ assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
+ filter == PIPE_TEX_MIPFILTER_LINEAR);
+
+ if (srcLeft != srcX0) {
+ /* left-right flip */
+ int tmp = dstX0;
+ dstX0 = dstX1;
+ dstX1 = tmp;
+ }
+
+ if (srcTop != srcY0) {
+ /* up-down flip */
+ int tmp = dstY0;
+ dstY0 = dstY1;
+ dstY1 = tmp;
+ }
+
+ /*
+ * XXX for now we're always creating a temporary texture.
+ * Strictly speaking that's not always needed.
+ */
+
+ /* create temp texture */
+ memset(&texTemp, 0, sizeof(texTemp));
+ texTemp.target = PIPE_TEXTURE_2D;
+ texTemp.format = src->format;
+ texTemp.last_level = 0;
+ texTemp.width[0] = srcW;
+ texTemp.height[0] = srcH;
+ texTemp.depth[0] = 1;
+ texTemp.compressed = 0;
+ texTemp.cpp = pf_get_bits(src->format) / 8;
+
+ tex = screen->texture_create(screen, &texTemp);
+ if (!tex)
+ return;
+
+ texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0);
+
+ /* load temp texture */
+ pipe->surface_copy(pipe, FALSE,
+ texSurf, 0, 0, /* dest */
+ src, srcLeft, srcTop, /* src */
+ srcW, srcH); /* size */
+
+ /* drawing dest */
+ memset(&fb, 0, sizeof(fb));
+ fb.num_cbufs = 1;
+ fb.cbufs[0] = dst;
+ pipe->set_framebuffer_state(pipe, &fb);
+
+ /* sampler */
+ if (filter == PIPE_TEX_MIPFILTER_NEAREST)
+ pipe->bind_sampler_states(pipe, 1, &ctx->samplers[0]);
+ else
+ pipe->bind_sampler_states(pipe, 1, &ctx->samplers[1]);
+
+ /* texture */
+ pipe->set_sampler_textures(pipe, 1, &tex);
+
+ /* shaders */
+ pipe->bind_fs_state(pipe, ctx->fs);
+ pipe->bind_vs_state(pipe, ctx->vs);
+
+ /* misc state */
+ pipe->bind_blend_state(pipe, ctx->blend);
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->depthstencil);
+ pipe->bind_rasterizer_state(pipe, ctx->rasterizer);
+
+ /* draw quad */
+ util_draw_texquad(pipe, dstX0, dstY0, dstX1, dstY1, z);
+
+ /* unbind */
+ pipe->set_sampler_textures(pipe, 0, NULL);
+ pipe->bind_sampler_states(pipe, 0, NULL);
+
+ /* free stuff */
+ pipe_surface_reference(&texSurf, NULL);
+ screen->texture_release(screen, &tex);
+
+ /* Note: caller must restore pipe/gallium state at this time */
+}
+
diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h
new file mode 100644
index 0000000000..a349be99ad
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_blit.h
@@ -0,0 +1,60 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef U_BLIT_H
+#define U_BLIT_H
+
+
+struct pipe_context;
+struct pipe_surface;
+
+
+struct blit_state;
+
+
+extern struct blit_state *
+util_create_blit(struct pipe_context *pipe);
+
+
+extern void
+util_destroy_blit(struct blit_state *ctx);
+
+
+
+extern void
+util_blit_pixels(struct blit_state *ctx,
+ struct pipe_surface *src,
+ int srcX0, int srcY0,
+ int srcX1, int srcY1,
+ struct pipe_surface *dst,
+ int dstX0, int dstY0,
+ int dstX1, int dstY1,
+ float z, uint filter);
+
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
new file mode 100644
index 0000000000..79a69de633
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -0,0 +1,112 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "util/u_draw_quad.h"
+
+
+/**
+ * Draw screen-aligned textured quad.
+ */
+void
+util_draw_texquad(struct pipe_context *pipe,
+ float x0, float y0, float x1, float y1, float z)
+{
+ struct pipe_buffer *vbuf;
+ struct pipe_vertex_buffer vbuffer;
+ struct pipe_vertex_element velement;
+ uint numAttribs = 2, vertexBytes, i, j;
+ float *v;
+
+ vertexBytes = 4 * (4 * numAttribs * sizeof(float));
+
+ /* XXX create one-time */
+ vbuf = pipe->winsys->buffer_create(pipe->winsys, 32,
+ PIPE_BUFFER_USAGE_VERTEX, vertexBytes);
+ assert(vbuf);
+
+ v = (float *) pipe->winsys->buffer_map(pipe->winsys, vbuf,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+
+ /*
+ * Load vertex buffer
+ */
+ for (i = j = 0; i < 4; i++) {
+ v[j + 2] = z; /* z */
+ v[j + 3] = 1.0; /* w */
+ v[j + 6] = 0.0; /* r */
+ v[j + 7] = 1.0; /* q */
+ j += 8;
+ }
+
+ v[0] = x0;
+ v[1] = y0;
+ v[4] = 0.0; /*s*/
+ v[5] = 0.0; /*t*/
+
+ v[8] = x1;
+ v[9] = y0;
+ v[12] = 1.0;
+ v[13] = 0.0;
+
+ v[16] = x1;
+ v[17] = y1;
+ v[20] = 1.0;
+ v[21] = 1.0;
+
+ v[24] = x0;
+ v[25] = y1;
+ v[28] = 0.0;
+ v[29] = 1.0;
+
+ pipe->winsys->buffer_unmap(pipe->winsys, vbuf);
+
+ /* tell pipe about the vertex buffer */
+ vbuffer.buffer = vbuf;
+ vbuffer.pitch = numAttribs * 4 * sizeof(float); /* vertex size */
+ vbuffer.buffer_offset = 0;
+ pipe->set_vertex_buffer(pipe, 0, &vbuffer);
+
+ /* tell pipe about the vertex attributes */
+ for (i = 0; i < numAttribs; i++) {
+ velement.src_offset = i * 4 * sizeof(float);
+ velement.vertex_buffer_index = 0;
+ velement.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ velement.nr_components = 4;
+ pipe->set_vertex_element(pipe, i, &velement);
+ }
+
+ /* draw */
+ pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN, 0, 4);
+
+ /* XXX: do one-time */
+ pipe_buffer_reference(pipe->winsys, &vbuf, NULL);
+}
diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h
new file mode 100644
index 0000000000..a97f55d2ef
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_draw_quad.h
@@ -0,0 +1,37 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_DRAWQUAD_H
+#define U_DRAWQUAD_H
+
+
+extern void
+util_draw_texquad(struct pipe_context *pipe,
+ float x0, float y0, float x1, float y1, float z);
+
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
new file mode 100644
index 0000000000..e18f8ab72a
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -0,0 +1,877 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Mipmap generation utility
+ *
+ * @author Brian Paul
+ */
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_util.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_draw_quad.h"
+#include "util/u_gen_mipmap.h"
+#include "util/u_simple_shaders.h"
+
+#include "tgsi/util/tgsi_build.h"
+#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/util/tgsi_parse.h"
+
+
+struct gen_mipmap_state
+{
+ struct pipe_context *pipe;
+
+ void *blend;
+ void *depthstencil;
+ void *rasterizer;
+ /*struct pipe_viewport_state viewport;*/
+ struct pipe_sampler_state *vs;
+ struct pipe_sampler_state *fs;
+};
+
+
+
+enum dtype
+{
+ UBYTE,
+ UBYTE_3_3_2,
+ USHORT,
+ USHORT_4_4_4_4,
+ USHORT_5_6_5,
+ USHORT_1_5_5_5_REV,
+ UINT,
+ FLOAT,
+ HALF_FLOAT
+};
+
+
+typedef ushort half_float;
+
+
+#if 0
+extern half_float
+float_to_half(float f);
+
+extern float
+half_to_float(half_float h);
+#endif
+
+
+/**
+ * Average together two rows of a source image to produce a single new
+ * row in the dest image. It's legal for the two source rows to point
+ * to the same data. The source width must be equal to either the
+ * dest width or two times the dest width.
+ * \param datatype GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT, GL_FLOAT, etc.
+ * \param comps number of components per pixel (1..4)
+ */
+static void
+do_row(enum dtype datatype, uint comps, int srcWidth,
+ const void *srcRowA, const void *srcRowB,
+ int dstWidth, void *dstRow)
+{
+ const uint k0 = (srcWidth == dstWidth) ? 0 : 1;
+ const uint colStride = (srcWidth == dstWidth) ? 1 : 2;
+
+ assert(comps >= 1);
+ assert(comps <= 4);
+
+ /* This assertion is no longer valid with non-power-of-2 textures
+ assert(srcWidth == dstWidth || srcWidth == 2 * dstWidth);
+ */
+
+ if (datatype == UBYTE && comps == 4) {
+ uint i, j, k;
+ const ubyte(*rowA)[4] = (const ubyte(*)[4]) srcRowA;
+ const ubyte(*rowB)[4] = (const ubyte(*)[4]) srcRowB;
+ ubyte(*dst)[4] = (ubyte(*)[4]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
+ dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
+ dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
+ dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4;
+ }
+ }
+ else if (datatype == UBYTE && comps == 3) {
+ uint i, j, k;
+ const ubyte(*rowA)[3] = (const ubyte(*)[3]) srcRowA;
+ const ubyte(*rowB)[3] = (const ubyte(*)[3]) srcRowB;
+ ubyte(*dst)[3] = (ubyte(*)[3]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
+ dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
+ dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
+ }
+ }
+ else if (datatype == UBYTE && comps == 2) {
+ uint i, j, k;
+ const ubyte(*rowA)[2] = (const ubyte(*)[2]) srcRowA;
+ const ubyte(*rowB)[2] = (const ubyte(*)[2]) srcRowB;
+ ubyte(*dst)[2] = (ubyte(*)[2]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) >> 2;
+ dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) >> 2;
+ }
+ }
+ else if (datatype == UBYTE && comps == 1) {
+ uint i, j, k;
+ const ubyte *rowA = (const ubyte *) srcRowA;
+ const ubyte *rowB = (const ubyte *) srcRowB;
+ ubyte *dst = (ubyte *) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) >> 2;
+ }
+ }
+
+ else if (datatype == USHORT && comps == 4) {
+ uint i, j, k;
+ const ushort(*rowA)[4] = (const ushort(*)[4]) srcRowA;
+ const ushort(*rowB)[4] = (const ushort(*)[4]) srcRowB;
+ ushort(*dst)[4] = (ushort(*)[4]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
+ dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
+ dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
+ dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4;
+ }
+ }
+ else if (datatype == USHORT && comps == 3) {
+ uint i, j, k;
+ const ushort(*rowA)[3] = (const ushort(*)[3]) srcRowA;
+ const ushort(*rowB)[3] = (const ushort(*)[3]) srcRowB;
+ ushort(*dst)[3] = (ushort(*)[3]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
+ dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
+ dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
+ }
+ }
+ else if (datatype == USHORT && comps == 2) {
+ uint i, j, k;
+ const ushort(*rowA)[2] = (const ushort(*)[2]) srcRowA;
+ const ushort(*rowB)[2] = (const ushort(*)[2]) srcRowB;
+ ushort(*dst)[2] = (ushort(*)[2]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
+ dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
+ }
+ }
+ else if (datatype == USHORT && comps == 1) {
+ uint i, j, k;
+ const ushort *rowA = (const ushort *) srcRowA;
+ const ushort *rowB = (const ushort *) srcRowB;
+ ushort *dst = (ushort *) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) / 4;
+ }
+ }
+
+ else if (datatype == FLOAT && comps == 4) {
+ uint i, j, k;
+ const float(*rowA)[4] = (const float(*)[4]) srcRowA;
+ const float(*rowB)[4] = (const float(*)[4]) srcRowB;
+ float(*dst)[4] = (float(*)[4]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i][0] = (rowA[j][0] + rowA[k][0] +
+ rowB[j][0] + rowB[k][0]) * 0.25F;
+ dst[i][1] = (rowA[j][1] + rowA[k][1] +
+ rowB[j][1] + rowB[k][1]) * 0.25F;
+ dst[i][2] = (rowA[j][2] + rowA[k][2] +
+ rowB[j][2] + rowB[k][2]) * 0.25F;
+ dst[i][3] = (rowA[j][3] + rowA[k][3] +
+ rowB[j][3] + rowB[k][3]) * 0.25F;
+ }
+ }
+ else if (datatype == FLOAT && comps == 3) {
+ uint i, j, k;
+ const float(*rowA)[3] = (const float(*)[3]) srcRowA;
+ const float(*rowB)[3] = (const float(*)[3]) srcRowB;
+ float(*dst)[3] = (float(*)[3]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i][0] = (rowA[j][0] + rowA[k][0] +
+ rowB[j][0] + rowB[k][0]) * 0.25F;
+ dst[i][1] = (rowA[j][1] + rowA[k][1] +
+ rowB[j][1] + rowB[k][1]) * 0.25F;
+ dst[i][2] = (rowA[j][2] + rowA[k][2] +
+ rowB[j][2] + rowB[k][2]) * 0.25F;
+ }
+ }
+ else if (datatype == FLOAT && comps == 2) {
+ uint i, j, k;
+ const float(*rowA)[2] = (const float(*)[2]) srcRowA;
+ const float(*rowB)[2] = (const float(*)[2]) srcRowB;
+ float(*dst)[2] = (float(*)[2]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i][0] = (rowA[j][0] + rowA[k][0] +
+ rowB[j][0] + rowB[k][0]) * 0.25F;
+ dst[i][1] = (rowA[j][1] + rowA[k][1] +
+ rowB[j][1] + rowB[k][1]) * 0.25F;
+ }
+ }
+ else if (datatype == FLOAT && comps == 1) {
+ uint i, j, k;
+ const float *rowA = (const float *) srcRowA;
+ const float *rowB = (const float *) srcRowB;
+ float *dst = (float *) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) * 0.25F;
+ }
+ }
+
+#if 0
+ else if (datatype == HALF_FLOAT && comps == 4) {
+ uint i, j, k, comp;
+ const half_float(*rowA)[4] = (const half_float(*)[4]) srcRowA;
+ const half_float(*rowB)[4] = (const half_float(*)[4]) srcRowB;
+ half_float(*dst)[4] = (half_float(*)[4]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ for (comp = 0; comp < 4; comp++) {
+ float aj, ak, bj, bk;
+ aj = half_to_float(rowA[j][comp]);
+ ak = half_to_float(rowA[k][comp]);
+ bj = half_to_float(rowB[j][comp]);
+ bk = half_to_float(rowB[k][comp]);
+ dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
+ }
+ }
+ }
+ else if (datatype == HALF_FLOAT && comps == 3) {
+ uint i, j, k, comp;
+ const half_float(*rowA)[3] = (const half_float(*)[3]) srcRowA;
+ const half_float(*rowB)[3] = (const half_float(*)[3]) srcRowB;
+ half_float(*dst)[3] = (half_float(*)[3]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ for (comp = 0; comp < 3; comp++) {
+ float aj, ak, bj, bk;
+ aj = half_to_float(rowA[j][comp]);
+ ak = half_to_float(rowA[k][comp]);
+ bj = half_to_float(rowB[j][comp]);
+ bk = half_to_float(rowB[k][comp]);
+ dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
+ }
+ }
+ }
+ else if (datatype == HALF_FLOAT && comps == 2) {
+ uint i, j, k, comp;
+ const half_float(*rowA)[2] = (const half_float(*)[2]) srcRowA;
+ const half_float(*rowB)[2] = (const half_float(*)[2]) srcRowB;
+ half_float(*dst)[2] = (half_float(*)[2]) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ for (comp = 0; comp < 2; comp++) {
+ float aj, ak, bj, bk;
+ aj = half_to_float(rowA[j][comp]);
+ ak = half_to_float(rowA[k][comp]);
+ bj = half_to_float(rowB[j][comp]);
+ bk = half_to_float(rowB[k][comp]);
+ dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
+ }
+ }
+ }
+ else if (datatype == HALF_FLOAT && comps == 1) {
+ uint i, j, k;
+ const half_float *rowA = (const half_float *) srcRowA;
+ const half_float *rowB = (const half_float *) srcRowB;
+ half_float *dst = (half_float *) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ float aj, ak, bj, bk;
+ aj = half_to_float(rowA[j]);
+ ak = half_to_float(rowA[k]);
+ bj = half_to_float(rowB[j]);
+ bk = half_to_float(rowB[k]);
+ dst[i] = float_to_half((aj + ak + bj + bk) * 0.25F);
+ }
+ }
+#endif
+
+ else if (datatype == UINT && comps == 1) {
+ uint i, j, k;
+ const uint *rowA = (const uint *) srcRowA;
+ const uint *rowB = (const uint *) srcRowB;
+ uint *dst = (uint *) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4;
+ }
+ }
+
+ else if (datatype == USHORT_5_6_5 && comps == 3) {
+ uint i, j, k;
+ const ushort *rowA = (const ushort *) srcRowA;
+ const ushort *rowB = (const ushort *) srcRowB;
+ ushort *dst = (ushort *) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0x1f;
+ const int rowAr1 = rowA[k] & 0x1f;
+ const int rowBr0 = rowB[j] & 0x1f;
+ const int rowBr1 = rowB[k] & 0x1f;
+ const int rowAg0 = (rowA[j] >> 5) & 0x3f;
+ const int rowAg1 = (rowA[k] >> 5) & 0x3f;
+ const int rowBg0 = (rowB[j] >> 5) & 0x3f;
+ const int rowBg1 = (rowB[k] >> 5) & 0x3f;
+ const int rowAb0 = (rowA[j] >> 11) & 0x1f;
+ const int rowAb1 = (rowA[k] >> 11) & 0x1f;
+ const int rowBb0 = (rowB[j] >> 11) & 0x1f;
+ const int rowBb1 = (rowB[k] >> 11) & 0x1f;
+ const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
+ const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
+ const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
+ dst[i] = (blue << 11) | (green << 5) | red;
+ }
+ }
+ else if (datatype == USHORT_4_4_4_4 && comps == 4) {
+ uint i, j, k;
+ const ushort *rowA = (const ushort *) srcRowA;
+ const ushort *rowB = (const ushort *) srcRowB;
+ ushort *dst = (ushort *) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0xf;
+ const int rowAr1 = rowA[k] & 0xf;
+ const int rowBr0 = rowB[j] & 0xf;
+ const int rowBr1 = rowB[k] & 0xf;
+ const int rowAg0 = (rowA[j] >> 4) & 0xf;
+ const int rowAg1 = (rowA[k] >> 4) & 0xf;
+ const int rowBg0 = (rowB[j] >> 4) & 0xf;
+ const int rowBg1 = (rowB[k] >> 4) & 0xf;
+ const int rowAb0 = (rowA[j] >> 8) & 0xf;
+ const int rowAb1 = (rowA[k] >> 8) & 0xf;
+ const int rowBb0 = (rowB[j] >> 8) & 0xf;
+ const int rowBb1 = (rowB[k] >> 8) & 0xf;
+ const int rowAa0 = (rowA[j] >> 12) & 0xf;
+ const int rowAa1 = (rowA[k] >> 12) & 0xf;
+ const int rowBa0 = (rowB[j] >> 12) & 0xf;
+ const int rowBa1 = (rowB[k] >> 12) & 0xf;
+ const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
+ const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
+ const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
+ const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2;
+ dst[i] = (alpha << 12) | (blue << 8) | (green << 4) | red;
+ }
+ }
+ else if (datatype == USHORT_1_5_5_5_REV && comps == 4) {
+ uint i, j, k;
+ const ushort *rowA = (const ushort *) srcRowA;
+ const ushort *rowB = (const ushort *) srcRowB;
+ ushort *dst = (ushort *) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0x1f;
+ const int rowAr1 = rowA[k] & 0x1f;
+ const int rowBr0 = rowB[j] & 0x1f;
+ const int rowBr1 = rowB[k] & 0xf;
+ const int rowAg0 = (rowA[j] >> 5) & 0x1f;
+ const int rowAg1 = (rowA[k] >> 5) & 0x1f;
+ const int rowBg0 = (rowB[j] >> 5) & 0x1f;
+ const int rowBg1 = (rowB[k] >> 5) & 0x1f;
+ const int rowAb0 = (rowA[j] >> 10) & 0x1f;
+ const int rowAb1 = (rowA[k] >> 10) & 0x1f;
+ const int rowBb0 = (rowB[j] >> 10) & 0x1f;
+ const int rowBb1 = (rowB[k] >> 10) & 0x1f;
+ const int rowAa0 = (rowA[j] >> 15) & 0x1;
+ const int rowAa1 = (rowA[k] >> 15) & 0x1;
+ const int rowBa0 = (rowB[j] >> 15) & 0x1;
+ const int rowBa1 = (rowB[k] >> 15) & 0x1;
+ const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
+ const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
+ const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
+ const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2;
+ dst[i] = (alpha << 15) | (blue << 10) | (green << 5) | red;
+ }
+ }
+ else if (datatype == UBYTE_3_3_2 && comps == 3) {
+ uint i, j, k;
+ const ubyte *rowA = (const ubyte *) srcRowA;
+ const ubyte *rowB = (const ubyte *) srcRowB;
+ ubyte *dst = (ubyte *) dstRow;
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0x3;
+ const int rowAr1 = rowA[k] & 0x3;
+ const int rowBr0 = rowB[j] & 0x3;
+ const int rowBr1 = rowB[k] & 0x3;
+ const int rowAg0 = (rowA[j] >> 2) & 0x7;
+ const int rowAg1 = (rowA[k] >> 2) & 0x7;
+ const int rowBg0 = (rowB[j] >> 2) & 0x7;
+ const int rowBg1 = (rowB[k] >> 2) & 0x7;
+ const int rowAb0 = (rowA[j] >> 5) & 0x7;
+ const int rowAb1 = (rowA[k] >> 5) & 0x7;
+ const int rowBb0 = (rowB[j] >> 5) & 0x7;
+ const int rowBb1 = (rowB[k] >> 5) & 0x7;
+ const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
+ const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
+ const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
+ dst[i] = (blue << 5) | (green << 2) | red;
+ }
+ }
+ else {
+ debug_printf("bad format in do_row()");
+ }
+}
+
+
+static void
+format_to_type_comps(enum pipe_format pformat,
+ enum dtype *datatype, uint *comps)
+{
+ switch (pformat) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ *datatype = UBYTE;
+ *comps = 4;
+ return;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ *datatype = USHORT_1_5_5_5_REV;
+ *comps = 4;
+ return;
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ *datatype = USHORT_4_4_4_4;
+ *comps = 4;
+ return;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ *datatype = USHORT_5_6_5;
+ *comps = 3;
+ return;
+ case PIPE_FORMAT_U_L8:
+ case PIPE_FORMAT_U_A8:
+ case PIPE_FORMAT_U_I8:
+ *datatype = UBYTE;
+ *comps = 1;
+ return;
+ case PIPE_FORMAT_U_A8_L8:
+ *datatype = UBYTE;
+ *comps = 2;
+ return;
+ default:
+ assert(0);
+ }
+}
+
+
+static void
+reduce_1d(enum pipe_format pformat,
+ int srcWidth, const ubyte *srcPtr,
+ int dstWidth, ubyte *dstPtr)
+{
+ enum dtype datatype;
+ uint comps;
+
+ format_to_type_comps(pformat, &datatype, &comps);
+
+ /* we just duplicate the input row, kind of hack, saves code */
+ do_row(datatype, comps,
+ srcWidth, srcPtr, srcPtr,
+ dstWidth, dstPtr);
+}
+
+
+/**
+ * Strides are in bytes. If zero, it'll be computed as width * bpp.
+ */
+static void
+reduce_2d(enum pipe_format pformat,
+ int srcWidth, int srcHeight,
+ int srcRowStride, const ubyte *srcPtr,
+ int dstWidth, int dstHeight,
+ int dstRowStride, ubyte *dstPtr)
+{
+ enum dtype datatype;
+ uint comps;
+ const int bpt = pf_get_size(pformat);
+ const ubyte *srcA, *srcB;
+ ubyte *dst;
+ int row;
+
+ format_to_type_comps(pformat, &datatype, &comps);
+
+ if (!srcRowStride)
+ srcRowStride = bpt * srcWidth;
+
+ if (!dstRowStride)
+ dstRowStride = bpt * dstWidth;
+
+ /* Compute src and dst pointers */
+ srcA = srcPtr;
+ if (srcHeight > 1)
+ srcB = srcA + srcRowStride;
+ else
+ srcB = srcA;
+ dst = dstPtr;
+
+ for (row = 0; row < dstHeight; row++) {
+ do_row(datatype, comps,
+ srcWidth, srcA, srcB,
+ dstWidth, dst);
+ srcA += 2 * srcRowStride;
+ srcB += 2 * srcRowStride;
+ dst += dstRowStride;
+ }
+}
+
+
+static void
+make_1d_mipmap(struct gen_mipmap_state *ctx,
+ struct pipe_texture *pt,
+ uint face, uint baseLevel, uint lastLevel)
+{
+ struct pipe_context *pipe = ctx->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_winsys *winsys = pipe->winsys;
+ const uint zslice = 0;
+ uint dstLevel;
+
+ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
+ const uint srcLevel = dstLevel - 1;
+ struct pipe_surface *srcSurf, *dstSurf;
+ void *srcMap, *dstMap;
+
+ srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice);
+ dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice);
+
+ srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ)
+ + srcSurf->offset);
+ dstMap = ((ubyte *) winsys->buffer_map(winsys, dstSurf->buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE)
+ + dstSurf->offset);
+
+ reduce_1d(pt->format,
+ srcSurf->width, srcMap,
+ dstSurf->width, dstMap);
+
+ winsys->buffer_unmap(winsys, srcSurf->buffer);
+ winsys->buffer_unmap(winsys, dstSurf->buffer);
+
+ pipe_surface_reference(&srcSurf, NULL);
+ pipe_surface_reference(&dstSurf, NULL);
+ }
+}
+
+
+static void
+make_2d_mipmap(struct gen_mipmap_state *ctx,
+ struct pipe_texture *pt,
+ uint face, uint baseLevel, uint lastLevel)
+{
+ struct pipe_context *pipe = ctx->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_winsys *winsys = pipe->winsys;
+ const uint zslice = 0;
+ uint dstLevel;
+ const int bpt = pf_get_size(pt->format);
+
+ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
+ const uint srcLevel = dstLevel - 1;
+ struct pipe_surface *srcSurf, *dstSurf;
+ ubyte *srcMap, *dstMap;
+
+ srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice);
+ dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice);
+
+ srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ)
+ + srcSurf->offset);
+ dstMap = ((ubyte *) winsys->buffer_map(winsys, dstSurf->buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE)
+ + dstSurf->offset);
+
+ reduce_2d(pt->format,
+ srcSurf->width, srcSurf->height,
+ srcSurf->pitch * bpt, srcMap,
+ dstSurf->width, dstSurf->height,
+ dstSurf->pitch * bpt, dstMap);
+
+ winsys->buffer_unmap(winsys, srcSurf->buffer);
+ winsys->buffer_unmap(winsys, dstSurf->buffer);
+
+ pipe_surface_reference(&srcSurf, NULL);
+ pipe_surface_reference(&dstSurf, NULL);
+ }
+}
+
+
+static void
+make_3d_mipmap(struct gen_mipmap_state *ctx,
+ struct pipe_texture *pt,
+ uint face, uint baseLevel, uint lastLevel)
+{
+}
+
+
+static void
+fallback_gen_mipmap(struct gen_mipmap_state *ctx,
+ struct pipe_texture *pt,
+ uint face, uint baseLevel, uint lastLevel)
+{
+ switch (pt->target) {
+ case PIPE_TEXTURE_1D:
+ make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel);
+ break;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_CUBE:
+ make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel);
+ break;
+ case PIPE_TEXTURE_3D:
+ make_3d_mipmap(ctx, pt, face, baseLevel, lastLevel);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+/**
+ * Create a mipmap generation context.
+ * The idea is to create one of these and re-use it each time we need to
+ * generate a mipmap.
+ */
+struct gen_mipmap_state *
+util_create_gen_mipmap(struct pipe_context *pipe)
+{
+ struct pipe_blend_state blend;
+ struct pipe_depth_stencil_alpha_state depthstencil;
+ struct pipe_rasterizer_state rasterizer;
+ struct gen_mipmap_state *ctx;
+
+ ctx = CALLOC_STRUCT(gen_mipmap_state);
+ if (!ctx)
+ return NULL;
+
+ ctx->pipe = pipe;
+
+ /* we don't use blending, but need to set valid values */
+ memset(&blend, 0, sizeof(blend));
+ blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
+ blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
+ blend.colormask = PIPE_MASK_RGBA;
+ ctx->blend = pipe->create_blend_state(pipe, &blend);
+
+ /* depth/stencil/alpha */
+ memset(&depthstencil, 0, sizeof(depthstencil));
+ ctx->depthstencil = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil);
+
+ /* rasterizer */
+ memset(&rasterizer, 0, sizeof(rasterizer));
+ rasterizer.front_winding = PIPE_WINDING_CW;
+ rasterizer.cull_mode = PIPE_WINDING_NONE;
+ rasterizer.bypass_clipping = 1; /* bypasses viewport too */
+ //rasterizer.bypass_vs = 1;
+ ctx->rasterizer = pipe->create_rasterizer_state(pipe, &rasterizer);
+
+#if 0
+ /* viewport */
+ ctx->viewport.scale[0] = 1.0;
+ ctx->viewport.scale[1] = 1.0;
+ ctx->viewport.scale[2] = 1.0;
+ ctx->viewport.scale[3] = 1.0;
+ ctx->viewport.translate[0] = 0.0;
+ ctx->viewport.translate[1] = 0.0;
+ ctx->viewport.translate[2] = 0.0;
+ ctx->viewport.translate[3] = 0.0;
+#endif
+
+ /* vertex shader */
+ {
+ const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_GENERIC };
+ const uint semantic_indexes[] = { 0, 0 };
+ ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
+ semantic_indexes);
+ }
+
+ /* fragment shader */
+ ctx->fs = util_make_fragment_tex_shader(pipe);
+
+ return ctx;
+}
+
+
+/**
+ * Destroy a mipmap generation context
+ */
+void
+util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
+{
+ struct pipe_context *pipe = ctx->pipe;
+
+ pipe->delete_blend_state(pipe, ctx->blend);
+ pipe->delete_depth_stencil_alpha_state(pipe, ctx->depthstencil);
+ pipe->delete_rasterizer_state(pipe, ctx->rasterizer);
+ pipe->delete_vs_state(pipe, ctx->vs);
+ pipe->delete_fs_state(pipe, ctx->fs);
+
+ FREE(ctx);
+}
+
+
+#if 0
+static void
+simple_viewport(struct pipe_context *pipe, uint width, uint height)
+{
+ struct pipe_viewport_state vp;
+
+ vp.scale[0] = 0.5 * width;
+ vp.scale[1] = -0.5 * height;
+ vp.scale[2] = 1.0;
+ vp.scale[3] = 1.0;
+ vp.translate[0] = 0.5 * width;
+ vp.translate[1] = 0.5 * height;
+ vp.translate[2] = 0.0;
+ vp.translate[3] = 0.0;
+
+ pipe->set_viewport_state(pipe, &vp);
+}
+#endif
+
+
+/**
+ * Generate mipmap images. It's assumed all needed texture memory is
+ * already allocated.
+ *
+ * \param pt the texture to generate mipmap levels for
+ * \param face which cube face to generate mipmaps for (0 for non-cube maps)
+ * \param baseLevel the first mipmap level to use as a src
+ * \param lastLevel the last mipmap level to generate
+ */
+void
+util_gen_mipmap(struct gen_mipmap_state *ctx,
+ struct pipe_texture *pt,
+ uint face, uint baseLevel, uint lastLevel)
+{
+ struct pipe_context *pipe = ctx->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_framebuffer_state fb;
+ struct pipe_sampler_state sampler;
+ void *sampler_cso;
+ uint dstLevel;
+ uint zslice = 0;
+
+ /* check if we can render in the texture's format */
+ if (!screen->is_format_supported(screen, pt->format, PIPE_SURFACE)) {
+ fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel);
+ return;
+ }
+
+ /* init framebuffer state */
+ memset(&fb, 0, sizeof(fb));
+ fb.num_cbufs = 1;
+
+ /* sampler state */
+ memset(&sampler, 0, sizeof(sampler));
+ sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
+ sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
+ sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
+ sampler.normalized_coords = 1;
+
+ /* bind our state */
+ pipe->bind_blend_state(pipe, ctx->blend);
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->depthstencil);
+ pipe->bind_rasterizer_state(pipe, ctx->rasterizer);
+ pipe->bind_vs_state(pipe, ctx->vs);
+ pipe->bind_fs_state(pipe, ctx->fs);
+#if 0
+ pipe->set_viewport_state(pipe, &ctx->viewport);
+#endif
+
+ /*
+ * XXX for small mipmap levels, it may be faster to use the software
+ * fallback path...
+ */
+ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
+ const uint srcLevel = dstLevel - 1;
+
+ /*
+ * Setup framebuffer / dest surface
+ */
+ fb.cbufs[0] = screen->get_tex_surface(screen, pt, face, dstLevel, zslice);
+ pipe->set_framebuffer_state(pipe, &fb);
+
+ /*
+ * Setup sampler state
+ * Note: we should only have to set the min/max LOD clamps to ensure
+ * we grab texels from the right mipmap level. But some hardware
+ * has trouble with min clamping so we also set the lod_bias to
+ * try to work around that.
+ */
+ sampler.min_lod = sampler.max_lod = (float) srcLevel;
+ sampler.lod_bias = (float) srcLevel;
+ sampler_cso = pipe->create_sampler_state(pipe, &sampler);
+ pipe->bind_sampler_states(pipe, 1, &sampler_cso);
+
+#if 0
+ simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]);
+#endif
+
+ pipe->set_sampler_textures(pipe, 1, &pt);
+
+ /* quad coords in window coords (bypassing clipping, viewport mapping) */
+ util_draw_texquad(pipe,
+ 0.0F, 0.0F, /* x0, y0 */
+ (float) pt->width[dstLevel], /* x1 */
+ (float) pt->height[dstLevel], /* y1 */
+ 0.0F); /* z */
+
+
+ pipe->flush(pipe, PIPE_FLUSH_WAIT);
+
+ /*pipe->texture_update(pipe, pt); not really needed */
+
+ pipe->delete_sampler_state(pipe, sampler_cso);
+ }
+
+ /* Note: caller must restore pipe/gallium state at this time */
+}
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h
new file mode 100644
index 0000000000..80496140a2
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.h
@@ -0,0 +1,52 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_GENMIPMAP_H
+#define U_GENMIPMAP_H
+
+#include "pipe/p_state.h"
+
+
+struct gen_mipmap_state;
+
+
+extern struct gen_mipmap_state *
+util_create_gen_mipmap(struct pipe_context *pipe);
+
+
+extern void
+util_destroy_gen_mipmap(struct gen_mipmap_state *ctx);
+
+
+
+extern void
+util_gen_mipmap(struct gen_mipmap_state *ctx,
+ struct pipe_texture *pt,
+ uint face, uint baseLevel, uint lastLevel);
+
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c
index d25872972a..0bfb9e1b4a 100644
--- a/src/gallium/auxiliary/util/u_handle_table.c
+++ b/src/gallium/auxiliary/util/u_handle_table.c
@@ -226,9 +226,13 @@ handle_table_remove(struct handle_table *ht,
index = handle - 1;
object = ht->objects[index];
- assert(object);
+ if(!object) {
+ /* XXX: this warning may be noisy for legitimate use -- remove later */
+ debug_warning("removing empty handle");
+ return;
+ }
- if(object && ht->destroy)
+ if(ht->destroy)
ht->destroy(object);
ht->objects[index] = NULL;
@@ -237,6 +241,28 @@ handle_table_remove(struct handle_table *ht,
}
+unsigned
+handle_table_get_next_handle(struct handle_table *ht,
+ unsigned handle)
+{
+ unsigned index;
+
+ for(index = handle; index < ht->size; ++index) {
+ if(!ht->objects[index])
+ return index + 1;
+ }
+
+ return 0;
+}
+
+
+unsigned
+handle_table_get_first_handle(struct handle_table *ht)
+{
+ return handle_table_get_next_handle(ht, 0);
+}
+
+
void
handle_table_destroy(struct handle_table *ht)
{
diff --git a/src/gallium/auxiliary/util/u_handle_table.h b/src/gallium/auxiliary/util/u_handle_table.h
index a2f1f604ad..d080135c9f 100644
--- a/src/gallium/auxiliary/util/u_handle_table.h
+++ b/src/gallium/auxiliary/util/u_handle_table.h
@@ -100,6 +100,15 @@ void
handle_table_destroy(struct handle_table *ht);
+unsigned
+handle_table_get_first_handle(struct handle_table *ht);
+
+
+unsigned
+handle_table_get_next_handle(struct handle_table *ht,
+ unsigned handle);
+
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c
index ac2cb1b540..f3f16a8d94 100644
--- a/src/gallium/auxiliary/util/u_hash_table.c
+++ b/src/gallium/auxiliary/util/u_hash_table.c
@@ -187,6 +187,28 @@ hash_table_remove(struct hash_table *ht,
}
+enum pipe_error
+hash_table_foreach(struct hash_table *ht,
+ enum pipe_error (*callback)(void *key, void *value, void *data),
+ void *data)
+{
+ struct cso_hash_iter iter;
+ struct hash_table_item *item;
+ enum pipe_error result;
+
+ iter = cso_hash_first_node(ht->cso);
+ while (!cso_hash_iter_is_null(iter)) {
+ item = (struct hash_table_item *)cso_hash_iter_data(iter);
+ result = callback(item->key, item->value, data);
+ if(result != PIPE_OK)
+ return result;
+ iter = cso_hash_iter_next(iter);
+ }
+
+ return PIPE_OK;
+}
+
+
void
hash_table_destroy(struct hash_table *ht)
{
@@ -196,4 +218,3 @@ hash_table_destroy(struct hash_table *ht)
FREE(ht);
}
-
diff --git a/src/gallium/auxiliary/util/u_hash_table.h b/src/gallium/auxiliary/util/u_hash_table.h
index d941f2c6b1..1583bd7548 100644
--- a/src/gallium/auxiliary/util/u_hash_table.h
+++ b/src/gallium/auxiliary/util/u_hash_table.h
@@ -75,6 +75,11 @@ hash_table_remove(struct hash_table *ht,
void *key);
+enum pipe_error
+hash_table_foreach(struct hash_table *ht,
+ enum pipe_error (*callback)(void *key, void *value, void *data),
+ void *data);
+
void
hash_table_destroy(struct hash_table *ht);
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
new file mode 100644
index 0000000000..cd13823985
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -0,0 +1,133 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Functions to produce packed colors/Z from floats.
+ */
+
+
+#ifndef U_PACK_COLOR_H
+#define U_PACK_COLOR_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+
+
+/**
+ * Note rgba outside [0,1] will be clamped for int pixel formats.
+ */
+static INLINE void
+util_pack_color(const float rgba[4], enum pipe_format format, void *dest)
+{
+ ubyte r, g, b, a;
+
+ if (pf_size_x(format) <= 8) {
+ /* format uses 8-bit components or less */
+ UNCLAMPED_FLOAT_TO_UBYTE(r, rgba[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(g, rgba[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(b, rgba[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(a, rgba[3]);
+ }
+
+ switch (format) {
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (r << 24) | (g << 16) | (b << 8) | a;
+ }
+ return;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (a << 24) | (r << 16) | (g << 8) | b;
+ }
+ return;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (b << 24) | (g << 16) | (r << 8) | a;
+ }
+ return;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ {
+ ushort *d = (ushort *) dest;
+ *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
+ }
+ return;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ {
+ float *d = (float *) dest;
+ d[0] = rgba[0];
+ d[1] = rgba[1];
+ d[2] = rgba[2];
+ d[3] = rgba[3];
+ }
+ return;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ {
+ float *d = (float *) dest;
+ d[0] = rgba[0];
+ d[1] = rgba[1];
+ d[2] = rgba[2];
+ }
+ return;
+ /* XXX lots more cases to add */
+ default:
+ debug_printf("gallium: unhandled format in util_pack_color()");
+ }
+}
+
+
+/**
+ * Note: it's assumed that z is in [0,1]
+ */
+static INLINE uint
+util_pack_z(enum pipe_format format, double z)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return (uint) (z * 0xffff);
+ case PIPE_FORMAT_Z32_UNORM:
+ /* special-case to avoid overflow */
+ if (z == 1.0)
+ return 0xffffffff;
+ else
+ return (uint) (z * 0xffffffff);
+ case PIPE_FORMAT_S8Z24_UNORM:
+ return (uint) (z * 0xffffff);
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return ((uint) (z * 0xffffff)) << 8;
+ default:
+ debug_printf("gallium: unhandled fomrat in util_pack_z()");
+ return 0;
+ }
+}
+
+
+#endif /* U_PACK_COLOR_H */
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
new file mode 100644
index 0000000000..88e2ab05bd
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -0,0 +1,263 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Simple vertex/fragment shader generators.
+ *
+ * @author Brian Paul
+ */
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_util.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_simple_shaders.h"
+
+#include "tgsi/util/tgsi_build.h"
+#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/util/tgsi_parse.h"
+
+
+
+/**
+ * Make simple vertex pass-through shader.
+ */
+void *
+util_make_vertex_passthrough_shader(struct pipe_context *pipe,
+ uint num_attribs,
+ const uint *semantic_names,
+ const uint *semantic_indexes)
+{
+ uint maxTokens = 100;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+ struct tgsi_processor *processor;
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+ const uint procType = TGSI_PROCESSOR_VERTEX;
+ uint ti, i;
+ struct pipe_shader_state shader;
+
+ tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0]));
+
+ /* shader header
+ */
+ *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
+
+ header = (struct tgsi_header *) &tokens[1];
+ *header = tgsi_build_header();
+
+ processor = (struct tgsi_processor *) &tokens[2];
+ *processor = tgsi_build_processor( procType, header );
+
+ ti = 3;
+
+ /* declare inputs */
+ for (i = 0; i < num_attribs; i++) {
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ /*
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION;
+ decl.Semantic.SemanticIndex = 0;
+ */
+ decl.u.DeclarationRange.First =
+ decl.u.DeclarationRange.Last = 0;
+ ti += tgsi_build_full_declaration(&decl,
+ &tokens[ti],
+ header,
+ maxTokens - ti);
+ }
+
+ /* declare outputs */
+ for (i = 0; i < num_attribs; i++) {
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_OUTPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = semantic_names[i];
+ decl.Semantic.SemanticIndex = semantic_indexes[i];
+ decl.u.DeclarationRange.First =
+ decl.u.DeclarationRange.Last = 0;
+ ti += tgsi_build_full_declaration(&decl,
+ &tokens[ti],
+ header,
+ maxTokens - ti);
+
+ }
+
+ /* emit MOV instructions */
+ for (i = 0; i < num_attribs; i++) {
+ /* MOVE out[i], in[i]; */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+ inst.Instruction.NumDstRegs = 1;
+ inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+ inst.FullDstRegisters[0].DstRegister.Index = i;
+ inst.Instruction.NumSrcRegs = 1;
+ inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+ inst.FullSrcRegisters[0].SrcRegister.Index = i;
+ ti += tgsi_build_full_instruction(&inst,
+ &tokens[ti],
+ header,
+ maxTokens - ti );
+ }
+
+ /* END instruction */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_END;
+ inst.Instruction.NumDstRegs = 0;
+ inst.Instruction.NumSrcRegs = 0;
+ ti += tgsi_build_full_instruction(&inst,
+ &tokens[ti],
+ header,
+ maxTokens - ti );
+
+#if 0 /*debug*/
+ tgsi_dump(tokens, 0);
+#endif
+
+ shader.tokens = tokens;
+ return pipe->create_vs_state(pipe, &shader);
+}
+
+
+
+
+/**
+ * Make simple fragment texture shader:
+ * TEX OUT[0], IN[0], SAMP[0], 2D;
+ * END;
+ */
+void *
+util_make_fragment_tex_shader(struct pipe_context *pipe)
+{
+ uint maxTokens = 100;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+ struct tgsi_processor *processor;
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+ const uint procType = TGSI_PROCESSOR_FRAGMENT;
+ uint ti;
+ struct pipe_shader_state shader;
+
+ tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0]));
+
+ /* shader header
+ */
+ *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
+
+ header = (struct tgsi_header *) &tokens[1];
+ *header = tgsi_build_header();
+
+ processor = (struct tgsi_processor *) &tokens[2];
+ *processor = tgsi_build_processor( procType, header );
+
+ ti = 3;
+
+ /* declare TEX[0] input */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
+ decl.Semantic.SemanticIndex = 0;
+ /* XXX this could be linear... */
+ decl.Declaration.Interpolate = 1;
+ decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+ decl.u.DeclarationRange.First =
+ decl.u.DeclarationRange.Last = 0;
+ ti += tgsi_build_full_declaration(&decl,
+ &tokens[ti],
+ header,
+ maxTokens - ti);
+
+ /* declare color[0] output */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_OUTPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR;
+ decl.Semantic.SemanticIndex = 0;
+ decl.u.DeclarationRange.First =
+ decl.u.DeclarationRange.Last = 0;
+ ti += tgsi_build_full_declaration(&decl,
+ &tokens[ti],
+ header,
+ maxTokens - ti);
+
+ /* declare sampler */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.u.DeclarationRange.First =
+ decl.u.DeclarationRange.Last = 0;
+ ti += tgsi_build_full_declaration(&decl,
+ &tokens[ti],
+ header,
+ maxTokens - ti);
+
+ /* TEX instruction */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ inst.Instruction.NumDstRegs = 1;
+ inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+ inst.FullDstRegisters[0].DstRegister.Index = 0;
+ inst.Instruction.NumSrcRegs = 2;
+ inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
+ inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+ inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+ inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
+ inst.FullSrcRegisters[1].SrcRegister.Index = 0;
+ ti += tgsi_build_full_instruction(&inst,
+ &tokens[ti],
+ header,
+ maxTokens - ti );
+
+ /* END instruction */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_END;
+ inst.Instruction.NumDstRegs = 0;
+ inst.Instruction.NumSrcRegs = 0;
+ ti += tgsi_build_full_instruction(&inst,
+ &tokens[ti],
+ header,
+ maxTokens - ti );
+
+#if 0 /*debug*/
+ tgsi_dump(tokens, 0);
+#endif
+
+ shader.tokens = tokens;
+ return pipe->create_fs_state(pipe, &shader);
+}
+
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h
new file mode 100644
index 0000000000..3ef4f28801
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_simple_shaders.h
@@ -0,0 +1,52 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef U_SIMPLE_SHADERS_H
+#define U_SIMPLE_SHADERS_H
+
+
+#include "pipe/p_compiler.h"
+
+
+struct pipe_context;
+
+
+extern void *
+util_make_vertex_passthrough_shader(struct pipe_context *pipe,
+ uint num_attribs,
+ const uint *semantic_names,
+ const uint *semantic_indexes);
+
+
+extern void *
+util_make_fragment_tex_shader(struct pipe_context *pipe);
+
+
+#endif
+
+
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index 9a4004535e..fe93fd8e1a 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -105,6 +105,16 @@
/**
+ */
+struct cell_command_depth_stencil_alpha_test {
+ uint64_t base; /**< Effective address of code start. */
+ unsigned size; /**< Size in bytes of test code. */
+ unsigned read_depth; /**< Flag: should depth be read? */
+ unsigned read_stencil; /**< Flag: should stencil be read? */
+};
+
+
+/**
* Tell SPUs about the framebuffer size, location
*/
struct cell_command_framebuffer
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile
index d38fa6ce07..0389a9554c 100644
--- a/src/gallium/drivers/cell/ppu/Makefile
+++ b/src/gallium/drivers/cell/ppu/Makefile
@@ -27,6 +27,7 @@ SOURCES = \
cell_flush.c \
cell_state_derived.c \
cell_state_emit.c \
+ cell_state_per_fragment.c \
cell_state_shader.c \
cell_pipe_state.c \
cell_screen.c \
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index b221424323..9e79db0ace 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -57,16 +57,37 @@ struct cell_fragment_shader_state
};
+struct cell_blend_state {
+ struct pipe_blend_state base;
+
+ /**
+ * Generated code to perform alpha blending
+ */
+ struct spe_function code;
+};
+
+
+struct cell_depth_stencil_alpha_state {
+ struct pipe_depth_stencil_alpha_state base;
+
+ /**
+ * Generated code to perform alpha, stencil, and depth testing on the SPE
+ */
+ struct spe_function code;
+
+};
+
+
struct cell_context
{
struct pipe_context pipe;
struct cell_winsys *winsys;
- const struct pipe_blend_state *blend;
+ const struct cell_blend_state *blend;
const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
uint num_samplers;
- const struct pipe_depth_stencil_alpha_state *depth_stencil;
+ const struct cell_depth_stencil_alpha_state *depth_stencil;
const struct pipe_rasterizer_state *rasterizer;
const struct cell_vertex_shader_state *vs;
const struct cell_fragment_shader_state *fs;
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
index 025ed3bbbf..c880760e4b 100644
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
@@ -36,6 +36,7 @@
#include "cell_context.h"
#include "cell_state.h"
#include "cell_texture.h"
+#include "cell_state_per_fragment.h"
@@ -43,7 +44,12 @@ static void *
cell_create_blend_state(struct pipe_context *pipe,
const struct pipe_blend_state *blend)
{
- return mem_dup(blend, sizeof(*blend));
+ struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state));
+
+ (void) memcpy(cb, blend, sizeof(*blend));
+ cb->code.store = NULL;
+
+ return cb;
}
@@ -54,7 +60,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend)
draw_flush(cell->draw);
- cell->blend = (const struct pipe_blend_state *)blend;
+ cell->blend = (const struct cell_blend_state *)blend;
cell->dirty |= CELL_NEW_BLEND;
}
@@ -63,7 +69,10 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend)
static void
cell_delete_blend_state(struct pipe_context *pipe, void *blend)
{
- FREE(blend);
+ struct cell_blend_state *cb = (struct cell_blend_state *) blend;
+
+ spe_release_func(& cb->code);
+ FREE(cb);
}
@@ -87,7 +96,13 @@ static void *
cell_create_depth_stencil_alpha_state(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *depth_stencil)
{
- return mem_dup(depth_stencil, sizeof(*depth_stencil));
+ struct cell_depth_stencil_alpha_state *cdsa =
+ MALLOC(sizeof(struct cell_depth_stencil_alpha_state));
+
+ (void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil));
+ cdsa->code.store = NULL;
+
+ return cdsa;
}
@@ -96,12 +111,16 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe,
void *depth_stencil)
{
struct cell_context *cell = cell_context(pipe);
+ struct cell_depth_stencil_alpha_state *cdsa =
+ (struct cell_depth_stencil_alpha_state *) depth_stencil;
draw_flush(cell->draw);
- cell->depth_stencil
- = (const struct pipe_depth_stencil_alpha_state *) depth_stencil;
+ if ((cdsa != NULL) && (cdsa->code.store == NULL)) {
+ cell_generate_depth_stencil_test(cdsa);
+ }
+ cell->depth_stencil = cdsa;
cell->dirty |= CELL_NEW_DEPTH_STENCIL;
}
@@ -109,7 +128,11 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe,
static void
cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth)
{
- FREE(depth);
+ struct cell_depth_stencil_alpha_state *cdsa =
+ (struct cell_depth_stencil_alpha_state *) depth;
+
+ spe_release_func(& cdsa->code);
+ FREE(cdsa);
}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 670eb26bdd..4d589bcdbf 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -71,9 +71,24 @@ cell_emit_state(struct cell_context *cell)
}
if (cell->dirty & CELL_NEW_DEPTH_STENCIL) {
- emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL,
- cell->depth_stencil,
- sizeof(struct pipe_depth_stencil_alpha_state));
+ struct cell_command_depth_stencil_alpha_test dsat;
+
+
+ if (cell->depth_stencil != NULL) {
+ dsat.base = (intptr_t) cell->depth_stencil->code.store;
+ dsat.size = (char *) cell->depth_stencil->code.csr
+ - (char *) cell->depth_stencil->code.store;
+ dsat.read_depth = TRUE;
+ dsat.read_stencil = FALSE;
+ } else {
+ dsat.base = 0;
+ dsat.size = 0;
+ dsat.read_depth = FALSE;
+ dsat.read_stencil = FALSE;
+ }
+
+ emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat,
+ sizeof(dsat));
}
if (cell->dirty & CELL_NEW_SAMPLER) {
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
new file mode 100644
index 0000000000..9c47968459
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
@@ -0,0 +1,1075 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file
+ * Generate code to perform all per-fragment operations.
+ *
+ * Code generated by these functions perform both alpha, depth, and stencil
+ * testing as well as alpha blending.
+ *
+ * \note
+ * Occlusion query is not supported, but this is the right place to add that
+ * support.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "cell_context.h"
+
+#include "rtasm/rtasm_ppc_spe.h"
+
+
+/**
+ * Generate code to perform alpha testing.
+ *
+ * The code generated by this function uses the register specificed by
+ * \c mask as both an input and an output.
+ *
+ * \param dsa Current alpha-test state
+ * \param f Function to which code should be appended
+ * \param mask Index of register containing active fragment mask
+ * \param alphas Index of register containing per-fragment alpha values
+ *
+ * \note Emits a maximum of 6 instructions.
+ */
+static void
+emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
+ struct spe_function *f, int mask, int alphas)
+{
+ /* If the alpha function is either NEVER or ALWAYS, there is no need to
+ * load the reference value into a register. ALWAYS is a fairly common
+ * case, and this optimization saves 2 instructions.
+ */
+ if (dsa->alpha.enabled
+ && (dsa->alpha.func != PIPE_FUNC_NEVER)
+ && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
+ int ref = spe_allocate_available_register(f);
+ int tmp_a = spe_allocate_available_register(f);
+ int tmp_b = spe_allocate_available_register(f);
+ union {
+ float f;
+ unsigned u;
+ } ref_val;
+ boolean complement = FALSE;
+
+ ref_val.f = dsa->alpha.ref;
+
+ spe_il(f, ref, ref_val.u & 0x0000ffff);
+ spe_ilh(f, ref, ref_val.u >> 16);
+
+ switch (dsa->alpha.func) {
+ case PIPE_FUNC_NOTEQUAL:
+ complement = TRUE;
+ /* FALLTHROUGH */
+
+ case PIPE_FUNC_EQUAL:
+ spe_fceq(f, tmp_a, ref, alphas);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ complement = TRUE;
+ /* FALLTHROUGH */
+
+ case PIPE_FUNC_GREATER:
+ spe_fcgt(f, tmp_a, ref, alphas);
+ break;
+
+ case PIPE_FUNC_LESS:
+ complement = TRUE;
+ /* FALLTHROUGH */
+
+ case PIPE_FUNC_GEQUAL:
+ spe_fcgt(f, tmp_a, ref, alphas);
+ spe_fceq(f, tmp_b, ref, alphas);
+ spe_or(f, tmp_a, tmp_b, tmp_a);
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ case PIPE_FUNC_NEVER:
+ default:
+ assert(0);
+ break;
+ }
+
+ if (complement) {
+ spe_andc(f, mask, mask, tmp_a);
+ } else {
+ spe_and(f, mask, mask, tmp_a);
+ }
+
+ spe_release_register(f, ref);
+ spe_release_register(f, tmp_a);
+ spe_release_register(f, tmp_b);
+ } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
+ spe_il(f, mask, 0);
+ }
+}
+
+
+/**
+ * \param dsa Current depth-test state
+ * \param f Function to which code should be appended
+ * \param m Mask of allocated / free SPE registers
+ * \param mask Index of register to contain depth-pass mask
+ * \param stored Index of register containing values from depth buffer
+ * \param calculated Index of register containing per-fragment depth values
+ *
+ * \return
+ * If the calculated depth comparison mask is the actual mask, \c FALSE is
+ * returned. If the calculated depth comparison mask is the compliment of
+ * the actual mask, \c TRUE is returned.
+ *
+ * \note Emits a maximum of 3 instructions.
+ */
+static boolean
+emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
+ struct spe_function *f, int mask, int stored, int calculated)
+{
+ unsigned func = (dsa->depth.enabled)
+ ? dsa->depth.func : PIPE_FUNC_ALWAYS;
+ int tmp = spe_allocate_available_register(f);
+ boolean compliment = FALSE;
+
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ spe_il(f, mask, 0);
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ compliment = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_EQUAL:
+ spe_ceq(f, mask, calculated, stored);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ compliment = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_GREATER:
+ spe_clgt(f, mask, calculated, stored);
+ break;
+
+ case PIPE_FUNC_LESS:
+ compliment = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_GEQUAL:
+ spe_clgt(f, mask, calculated, stored);
+ spe_ceq(f, tmp, calculated, stored);
+ spe_or(f, mask, mask, tmp);
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ spe_il(f, mask, ~0);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ spe_release_register(f, tmp);
+ return compliment;
+}
+
+
+/**
+ * \note Emits a maximum of 5 instructions.
+ *
+ * \warning
+ * Since \c out and \c in might be the same register, this routine cannot
+ * generate code that uses \c out as a temporary.
+ */
+static void
+emit_stencil_op(struct spe_function *f,
+ int out, int in, int mask, unsigned op, unsigned ref)
+{
+ const int clamp = spe_allocate_available_register(f);
+ const int clamp_mask = spe_allocate_available_register(f);
+ const int result = spe_allocate_available_register(f);
+
+ switch(op) {
+ case PIPE_STENCIL_OP_KEEP:
+ assert(0);
+ case PIPE_STENCIL_OP_ZERO:
+ spe_il(f, result, 0);
+ break;
+ case PIPE_STENCIL_OP_REPLACE:
+ spe_il(f, result, ref);
+ break;
+ case PIPE_STENCIL_OP_INCR:
+ spe_il(f, clamp, 0x0ff);
+ spe_ai(f, result, in, 1);
+ spe_clgti(f, clamp_mask, result, 0x0ff);
+ spe_selb(f, result, result, clamp, clamp_mask);
+ break;
+ case PIPE_STENCIL_OP_DECR:
+ spe_il(f, clamp, 0);
+ spe_ai(f, result, in, -1);
+
+ /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
+ * arithmetic.
+ */
+ spe_clgti(f, clamp_mask, result, 0x0ff);
+ spe_selb(f, result, result, clamp, clamp_mask);
+ break;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ spe_ai(f, result, in, 1);
+ break;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ spe_ai(f, result, in, -1);
+ break;
+ case PIPE_STENCIL_OP_INVERT:
+ spe_nor(f, result, in, in);
+ break;
+ default:
+ assert(0);
+ }
+
+ spe_selb(f, out, in, result, mask);
+
+ spe_release_register(f, result);
+ spe_release_register(f, clamp_mask);
+ spe_release_register(f, clamp);
+}
+
+
+/**
+ * \param dsa Depth / stencil test state
+ * \param face 0 for front face, 1 for back face
+ * \param f Function to append instructions to
+ * \param reg_mask Mask of allocated registers
+ * \param mask Register containing mask of fragments passing the
+ * alpha test
+ * \param depth_mask Register containing mask of fragments passing the
+ * depth test
+ * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
+ * \param stencil Register containing values from stencil buffer
+ * \param depth_pass Register to store mask of fragments passing stencil test
+ * and depth test
+ *
+ * \note
+ * Emits a maximum of 10 + (3 * 5) = 25 instructions.
+ */
+static int
+emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
+ unsigned face,
+ struct spe_function *f,
+ int mask,
+ int depth_mask,
+ boolean depth_complement,
+ int stencil,
+ int depth_pass)
+{
+ int stencil_fail = spe_allocate_available_register(f);
+ int depth_fail = spe_allocate_available_register(f);
+ int stencil_mask = spe_allocate_available_register(f);
+ int stencil_pass = spe_allocate_available_register(f);
+ int face_stencil = spe_allocate_available_register(f);
+ int stencil_src = stencil;
+ const unsigned ref = (dsa->stencil[face].ref_value
+ & dsa->stencil[face].value_mask);
+ boolean complement = FALSE;
+ int stored;
+ int tmp = spe_allocate_available_register(f);
+
+
+ if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
+ && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
+ && (dsa->stencil[face].value_mask != 0x0ff)) {
+ stored = spe_allocate_available_register(f);
+ spe_andi(f, stored, stencil, dsa->stencil[face].value_mask);
+ } else {
+ stored = stencil;
+ }
+
+
+ switch (dsa->stencil[face].func) {
+ case PIPE_FUNC_NEVER:
+ spe_il(f, stencil_mask, 0);
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ complement = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_EQUAL:
+ spe_ceqi(f, stencil_mask, stored, ref);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ complement = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_GREATER:
+ spe_clgti(f, stencil_mask, stored, ref);
+ break;
+
+ case PIPE_FUNC_LESS:
+ complement = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_GEQUAL:
+ spe_clgti(f, stencil_mask, stored, ref);
+ spe_ceqi(f, tmp, stored, ref);
+ spe_or(f, stencil_mask, stencil_mask, tmp);
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ /* See comment below. */
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ if (stored != stencil) {
+ spe_release_register(f, stored);
+ }
+ spe_release_register(f, tmp);
+
+
+ /* ALWAYS is a very common stencil-test, so some effort is applied to
+ * optimize that case. The stencil-pass mask is the same as the input
+ * fragment mask. This makes the stencil-test (above) a no-op, and the
+ * input fragment mask can be "renamed" the stencil-pass mask.
+ */
+ if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
+ spe_release_register(f, stencil_pass);
+ stencil_pass = mask;
+ } else {
+ if (complement) {
+ spe_andc(f, stencil_pass, mask, stencil_mask);
+ } else {
+ spe_and(f, stencil_pass, mask, stencil_mask);
+ }
+ }
+
+ if (depth_complement) {
+ spe_andc(f, depth_pass, stencil_pass, depth_mask);
+ } else {
+ spe_and(f, depth_pass, stencil_pass, depth_mask);
+ }
+
+
+ /* Conditionally emit code to update the stencil value under various
+ * condititons. Note that there is no need to generate code under the
+ * following circumstances:
+ *
+ * - Stencil write mask is zero.
+ * - For stencil-fail if the stencil test is ALWAYS
+ * - For depth-fail if the stencil test is NEVER
+ * - For depth-pass if the stencil test is NEVER
+ * - Any of the 3 conditions if the operation is KEEP
+ */
+ if (dsa->stencil[face].write_mask != 0) {
+ if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
+ && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
+ if (complement) {
+ spe_and(f, stencil_fail, mask, stencil_mask);
+ } else {
+ spe_andc(f, stencil_fail, mask, stencil_mask);
+ }
+
+ emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
+ dsa->stencil[face].fail_op,
+ dsa->stencil[face].ref_value);
+
+ stencil_src = face_stencil;
+ }
+
+ if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
+ && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
+ if (depth_complement) {
+ spe_and(f, depth_fail, stencil_pass, depth_mask);
+ } else {
+ spe_andc(f, depth_fail, stencil_pass, depth_mask);
+ }
+
+ emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
+ dsa->stencil[face].zfail_op,
+ dsa->stencil[face].ref_value);
+ stencil_src = face_stencil;
+ }
+
+ if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
+ && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
+ emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
+ dsa->stencil[face].zpass_op,
+ dsa->stencil[face].ref_value);
+ stencil_src = face_stencil;
+ }
+ }
+
+ spe_release_register(f, stencil_fail);
+ spe_release_register(f, depth_fail);
+ spe_release_register(f, stencil_mask);
+ if (stencil_pass != mask) {
+ spe_release_register(f, stencil_pass);
+ }
+
+ /* If all of the stencil operations were KEEP or the stencil write mask was
+ * zero, "stencil_src" will still be set to "stencil". In this case
+ * release the "face_stencil" register. Otherwise apply the stencil write
+ * mask to select bits from the calculated stencil value and the previous
+ * stencil value.
+ */
+ if (stencil_src == stencil) {
+ spe_release_register(f, face_stencil);
+ } else if (dsa->stencil[face].write_mask != 0x0ff) {
+ int tmp = spe_allocate_available_register(f);
+
+ spe_il(f, tmp, dsa->stencil[face].write_mask);
+ spe_selb(f, stencil_src, stencil, stencil_src, tmp);
+
+ spe_release_register(f, tmp);
+ }
+
+ return stencil_src;
+}
+
+
+void
+cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
+{
+ struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
+ struct spe_function *const f = &cdsa->code;
+
+ /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
+ * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
+ * up to 64 to make it a happy power-of-two.
+ */
+ spe_init_func(f, 4 * 64);
+
+
+ /* Allocate registers for the function's input parameters. Cleverly (and
+ * clever code is usually dangerous, but I couldn't resist) the generated
+ * function returns a structure. Returned structures start with register
+ * 3, and the structure fields are ordered to match up exactly with the
+ * input parameters.
+ */
+ int mask = spe_allocate_register(f, 3);
+ int depth = spe_allocate_register(f, 4);
+ int stencil = spe_allocate_register(f, 5);
+ int zvals = spe_allocate_register(f, 6);
+ int frag_a = spe_allocate_register(f, 7);
+ int facing = spe_allocate_register(f, 8);
+
+ int depth_mask = spe_allocate_available_register(f);
+
+ boolean depth_complement;
+
+
+ emit_alpha_test(dsa, f, mask, frag_a);
+
+ depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
+
+ if (dsa->stencil[0].enabled) {
+ const int front_depth_pass = spe_allocate_available_register(f);
+ int front_stencil = emit_stencil_test(dsa, 0, f, mask,
+ depth_mask, depth_complement,
+ stencil, front_depth_pass);
+
+ if (dsa->stencil[1].enabled) {
+ const int back_depth_pass = spe_allocate_available_register(f);
+ int back_stencil = emit_stencil_test(dsa, 1, f, mask,
+ depth_mask, depth_complement,
+ stencil, back_depth_pass);
+
+ /* If the front facing stencil value and the back facing stencil
+ * value are stored in the same register, there is no need to select
+ * a value based on the facing. This can happen if the stencil value
+ * was not modified due to the write masks being zero, the stencil
+ * operations being KEEP, etc.
+ */
+ if (front_stencil != back_stencil) {
+ spe_selb(f, stencil, back_stencil, front_stencil, facing);
+ }
+
+ if (back_stencil != stencil) {
+ spe_release_register(f, back_stencil);
+ }
+
+ if (front_stencil != stencil) {
+ spe_release_register(f, front_stencil);
+ }
+
+ spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
+
+ spe_release_register(f, back_depth_pass);
+ } else {
+ if (front_stencil != stencil) {
+ spe_or(f, stencil, front_stencil, front_stencil);
+ spe_release_register(f, front_stencil);
+ }
+ spe_or(f, mask, front_depth_pass, front_depth_pass);
+ }
+
+ spe_release_register(f, front_depth_pass);
+ } else if (dsa->depth.enabled) {
+ if (depth_complement) {
+ spe_andc(f, mask, mask, depth_mask);
+ } else {
+ spe_and(f, mask, mask, depth_mask);
+ }
+ }
+
+ if (dsa->depth.writemask) {
+ spe_selb(f, depth, depth, zvals, mask);
+ }
+
+ spe_bi(f, 0, 0, 0);
+
+
+#if 0
+ {
+ const uint32_t *p = f->store;
+ unsigned i;
+
+ printf("# alpha (%sabled)\n",
+ (dsa->alpha.enabled) ? "en" : "dis");
+ printf("# func: %u\n", dsa->alpha.func);
+ printf("# ref: %.2f\n", dsa->alpha.ref);
+
+ printf("# depth (%sabled)\n",
+ (dsa->depth.enabled) ? "en" : "dis");
+ printf("# func: %u\n", dsa->depth.func);
+
+ for (i = 0; i < 2; i++) {
+ printf("# %s stencil (%sabled)\n",
+ (i == 0) ? "front" : "back",
+ (dsa->stencil[i].enabled) ? "en" : "dis");
+
+ printf("# func: %u\n", dsa->stencil[i].func);
+ printf("# op (sf, zf, zp): %u %u %u\n",
+ dsa->stencil[i].fail_op,
+ dsa->stencil[i].zfail_op,
+ dsa->stencil[i].zpass_op);
+ printf("# ref value / value mask / write mask: %02x %02x %02x\n",
+ dsa->stencil[i].ref_value,
+ dsa->stencil[i].value_mask,
+ dsa->stencil[i].write_mask);
+ }
+
+ printf("\t.text\n");
+ for (/* empty */; p < f->csr; p++) {
+ printf("\t.long\t0x%04x\n", *p);
+ }
+ fflush(stdout);
+ }
+#endif
+}
+
+
+/**
+ * \note Emits a maximum of 3 instructions
+ */
+static int
+emit_alpha_factor_calculation(struct spe_function *f,
+ unsigned factor, float const_alpha,
+ int src_alpha, int dst_alpha)
+{
+ union {
+ float f;
+ unsigned u;
+ } alpha;
+ int factor_reg;
+ int tmp;
+
+
+ alpha.f = const_alpha;
+
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ factor_reg = -1;
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ factor_reg = spe_allocate_available_register(f);
+
+ spe_or(f, factor_reg, src_alpha, src_alpha);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ factor_reg = dst_alpha;
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ factor_reg = -1;
+ break;
+
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ const_alpha = 1.0 - const_alpha;
+ /* FALLTHROUGH */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ factor_reg = spe_allocate_available_register(f);
+
+ spe_il(f, factor_reg, alpha.u & 0x0ffff);
+ spe_ilh(f, factor_reg, alpha.u >> 16);
+ break;
+
+ case PIPE_BLENDFACTOR_ZERO:
+ factor_reg = -1;
+ break;
+
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ tmp = spe_allocate_available_register(f);
+ factor_reg = spe_allocate_available_register(f);
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor_reg, tmp, src_alpha);
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ tmp = spe_allocate_available_register(f);
+ factor_reg = spe_allocate_available_register(f);
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor_reg, tmp, dst_alpha);
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ default:
+ assert(0);
+ factor_reg = -1;
+ break;
+ }
+
+ return factor_reg;
+}
+
+
+/**
+ * \note Emits a maximum of 5 instructions
+ */
+static void
+emit_color_factor_calculation(struct spe_function *f,
+ unsigned sF, unsigned mask,
+ const struct pipe_blend_color *blend_color,
+ const int *src,
+ const int *dst,
+ int *factor)
+{
+ union {
+ float f[4];
+ unsigned u[4];
+ } color;
+ int tmp;
+ unsigned i;
+
+
+ color.f[0] = blend_color->color[0];
+ color.f[1] = blend_color->color[1];
+ color.f[2] = blend_color->color[2];
+ color.f[3] = blend_color->color[3];
+
+ factor[0] = -1;
+ factor[1] = -1;
+ factor[2] = -1;
+ factor[3] = -1;
+
+ switch (sF) {
+ case PIPE_BLENDFACTOR_ONE:
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ for (i = 0; i < 3; ++i) {
+ if ((mask & (1U << i)) != 0) {
+ factor[i] = spe_allocate_available_register(f);
+ spe_or(f, factor[i], src[i], src[i]);
+ }
+ }
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ factor[0] = spe_allocate_available_register(f);
+ factor[1] = factor[0];
+ factor[2] = factor[0];
+
+ spe_or(f, factor[0], src[3], src[3]);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ factor[0] = dst[3];
+ factor[1] = dst[3];
+ factor[2] = dst[3];
+ break;
+
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ factor[0] = dst[0];
+ factor[1] = dst[1];
+ factor[2] = dst[2];
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ tmp = spe_allocate_available_register(f);
+ factor[0] = spe_allocate_available_register(f);
+ factor[1] = factor[0];
+ factor[2] = factor[0];
+
+ /* Alpha saturate means min(As, 1-Ad).
+ */
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, tmp, tmp, dst[3]);
+ spe_fcgt(f, factor[0], tmp, src[3]);
+ spe_selb(f, factor[0], src[3], tmp, factor[0]);
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ color.f[0] = 1.0 - color.f[0];
+ color.f[1] = 1.0 - color.f[1];
+ color.f[2] = 1.0 - color.f[2];
+ /* FALLTHROUGH */
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ for (i = 0; i < 3; i++) {
+ factor[i] = spe_allocate_available_register(f);
+
+ spe_il(f, factor[i], color.u[i] & 0x0ffff);
+ spe_ilh(f, factor[i], color.u[i] >> 16);
+ }
+ break;
+
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ color.f[3] = 1.0 - color.f[3];
+ /* FALLTHROUGH */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ factor[0] = spe_allocate_available_register(f);
+ factor[1] = factor[0];
+ factor[2] = factor[0];
+
+ spe_il(f, factor[0], color.u[3] & 0x0ffff);
+ spe_ilh(f, factor[0], color.u[3] >> 16);
+ break;
+
+ case PIPE_BLENDFACTOR_ZERO:
+ break;
+
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ tmp = spe_allocate_available_register(f);
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+
+ for (i = 0; i < 3; ++i) {
+ if ((mask & (1U << i)) != 0) {
+ factor[i] = spe_allocate_available_register(f);
+ spe_fs(f, factor[i], tmp, src[i]);
+ }
+ }
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ tmp = spe_allocate_available_register(f);
+ factor[0] = spe_allocate_available_register(f);
+ factor[1] = factor[0];
+ factor[2] = factor[0];
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor[0], tmp, src[3]);
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ tmp = spe_allocate_available_register(f);
+ factor[0] = spe_allocate_available_register(f);
+ factor[1] = factor[0];
+ factor[2] = factor[0];
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor[0], tmp, dst[3]);
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ tmp = spe_allocate_available_register(f);
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+
+ for (i = 0; i < 3; ++i) {
+ if ((mask & (1U << i)) != 0) {
+ factor[i] = spe_allocate_available_register(f);
+ spe_fs(f, factor[i], tmp, dst[i]);
+ }
+ }
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ default:
+ assert(0);
+ }
+}
+
+
+static void
+emit_blend_calculation(struct spe_function *f,
+ unsigned func, unsigned sF, unsigned dF,
+ int src, int src_factor, int dst, int dst_factor)
+{
+ int tmp = spe_allocate_available_register(f);
+
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ if (sF == PIPE_BLENDFACTOR_ONE) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ /* Do nothing. */
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_fa(f, src, src, dst);
+ }
+ } else if (sF == PIPE_BLENDFACTOR_ZERO) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_il(f, src, 0);
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_or(f, src, dst, dst);
+ }
+ } else {
+ spe_fm(f, tmp, dst, dst_factor);
+ spe_fma(f, src, src, src_factor, tmp);
+ }
+ break;
+
+ case PIPE_BLEND_SUBTRACT:
+ if (sF == PIPE_BLENDFACTOR_ONE) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ /* Do nothing. */
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_fs(f, src, src, dst);
+ }
+ } else if (sF == PIPE_BLENDFACTOR_ZERO) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_il(f, src, 0);
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_il(f, tmp, 0);
+ spe_fs(f, src, tmp, dst);
+ }
+ } else {
+ spe_fm(f, tmp, dst, dst_factor);
+ spe_fms(f, src, src, src_factor, tmp);
+ }
+ break;
+
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ if (sF == PIPE_BLENDFACTOR_ONE) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_il(f, tmp, 0);
+ spe_fs(f, src, tmp, src);
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_fs(f, src, dst, src);
+ }
+ } else if (sF == PIPE_BLENDFACTOR_ZERO) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_il(f, src, 0);
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_or(f, src, dst, dst);
+ }
+ } else {
+ spe_fm(f, tmp, src, src_factor);
+ spe_fms(f, src, src, dst_factor, tmp);
+ }
+ break;
+
+ case PIPE_BLEND_MIN:
+ spe_cgt(f, tmp, src, dst);
+ spe_selb(f, src, dst, src, tmp);
+ break;
+
+ case PIPE_BLEND_MAX:
+ spe_cgt(f, tmp, src, dst);
+ spe_selb(f, src, src, dst, tmp);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ spe_release_register(f, tmp);
+}
+
+
+/**
+ * Generate code to perform alpha blending on the SPE
+ */
+void
+cell_generate_alpha_blend(struct cell_blend_state *cb,
+ const struct pipe_blend_color *blend_color)
+{
+ struct pipe_blend_state *const b = &cb->base;
+ struct spe_function *const f = &cb->code;
+
+ /* This code generates a maximum of 3 (source alpha factor)
+ * + 3 (destination alpha factor) + (3 * 5) (source color factor)
+ * + (3 * 5) (destination color factor) + (4 * 2) (blend equation)
+ * + 4 (fragment mask) + 1 (return) = 49 instlructions. Round up to 64 to
+ * make it a happy power-of-two.
+ */
+ spe_init_func(f, 4 * 64);
+
+
+ const int frag[4] = {
+ spe_allocate_register(f, 3),
+ spe_allocate_register(f, 4),
+ spe_allocate_register(f, 5),
+ spe_allocate_register(f, 6),
+ };
+ const int pixel[4] = {
+ spe_allocate_register(f, 7),
+ spe_allocate_register(f, 8),
+ spe_allocate_register(f, 9),
+ spe_allocate_register(f, 10),
+ };
+ const int mask = spe_allocate_register(f, 11);
+ unsigned func[4];
+ unsigned sF[4];
+ unsigned dF[4];
+ unsigned i;
+ int src_factor[4];
+ int dst_factor[4];
+
+
+ /* Does the selected blend mode make use of the source / destination
+ * color (RGB) blend factors?
+ */
+ boolean need_color_factor = b->blend_enable
+ && (b->rgb_func != PIPE_BLEND_MIN)
+ && (b->rgb_func != PIPE_BLEND_MAX);
+
+ /* Does the selected blend mode make use of the source / destination
+ * alpha blend factors?
+ */
+ boolean need_alpha_factor = b->blend_enable
+ && (b->alpha_func != PIPE_BLEND_MIN)
+ && (b->alpha_func != PIPE_BLEND_MAX);
+
+
+ sF[0] = b->rgb_src_factor;
+ sF[1] = sF[0];
+ sF[2] = sF[0];
+ sF[3] = (b->alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
+ ? PIPE_BLENDFACTOR_ONE : b->alpha_src_factor;
+
+ dF[0] = b->rgb_dst_factor;
+ dF[1] = dF[0];
+ dF[2] = dF[0];
+ dF[3] = b->rgb_dst_factor;
+
+
+ /* If alpha writing is enabled and the alpha blend mode requires use of
+ * the alpha factor, calculate the alpha factor.
+ */
+ if (((b->colormask & 8) != 0) && need_alpha_factor) {
+ src_factor[3] = emit_alpha_factor_calculation(f, sF[3],
+ blend_color->color[3],
+ frag[3], pixel[3]);
+
+ /* If the alpha destination blend factor is the same as the alpha source
+ * blend factor, re-use the previously calculated value.
+ */
+ dst_factor[3] = (dF[3] == sF[3])
+ ? src_factor[3]
+ : emit_alpha_factor_calculation(f, dF[3],
+ blend_color->color[3],
+ frag[3], pixel[3]);
+ }
+
+
+ if (sF[0] == sF[3]) {
+ src_factor[0] = src_factor[3];
+ src_factor[1] = src_factor[3];
+ src_factor[2] = src_factor[3];
+ } else if (sF[0] == dF[3]) {
+ src_factor[0] = dst_factor[3];
+ src_factor[1] = dst_factor[3];
+ src_factor[2] = dst_factor[3];
+ } else if (need_color_factor) {
+ emit_color_factor_calculation(f,
+ b->rgb_src_factor,
+ b->colormask,
+ blend_color,
+ frag, pixel, src_factor);
+ }
+
+
+ if (dF[0] == sF[3]) {
+ dst_factor[0] = src_factor[3];
+ dst_factor[1] = src_factor[3];
+ dst_factor[2] = src_factor[3];
+ } else if (dF[0] == dF[3]) {
+ dst_factor[0] = dst_factor[3];
+ dst_factor[1] = dst_factor[3];
+ dst_factor[2] = dst_factor[3];
+ } else if (dF[0] == sF[0]) {
+ dst_factor[0] = src_factor[0];
+ dst_factor[1] = src_factor[1];
+ dst_factor[2] = src_factor[2];
+ } else if (need_color_factor) {
+ emit_color_factor_calculation(f,
+ b->rgb_dst_factor,
+ b->colormask,
+ blend_color,
+ frag, pixel, dst_factor);
+ }
+
+
+
+ func[0] = b->rgb_func;
+ func[1] = func[0];
+ func[2] = func[0];
+ func[3] = b->alpha_func;
+
+ for (i = 0; i < 4; ++i) {
+ if ((b->colormask & (1U << i)) != 0) {
+ emit_blend_calculation(f,
+ func[i], sF[i], dF[i],
+ frag[i], src_factor[i],
+ pixel[i], dst_factor[i]);
+ spe_selb(f, frag[i], pixel[i], frag[i], mask);
+ } else {
+ spe_or(f, frag[i], pixel[i], pixel[i]);
+ }
+ }
+
+ spe_bi(f, 0, 0, 0);
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h
new file mode 100644
index 0000000000..541c3b3be0
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h
@@ -0,0 +1,35 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CELL_STATE_PER_FRAGMENT_H
+#define CELL_STATE_PER_FRAGMENT_H
+
+extern void
+cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa);
+
+extern void
+cell_generate_alpha_blend(struct cell_blend_state *cb,
+ const struct pipe_blend_color *blend_color);
+
+#endif /* CELL_STATE_PER_FRAGMENT_H */
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
index c071de1900..115ca8cd90 100644
--- a/src/gallium/drivers/cell/spu/Makefile
+++ b/src/gallium/drivers/cell/spu/Makefile
@@ -19,6 +19,7 @@ SOURCES = \
spu_main.c \
spu_blend.c \
spu_dcache.c \
+ spu_per_fragment_op.c \
spu_render.c \
spu_texture.c \
spu_tile.c \
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 59300028d4..937962285d 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -58,6 +58,9 @@ struct spu_vs_context draw;
static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX]
ALIGN16_ATTRIB;
+static unsigned char depth_stencil_code_buffer[4 * 64]
+ ALIGN16_ATTRIB;
+
/**
* Tell the PPU that this SPU has finished copying a buffer to
* local store and that it may be reused by the PPU.
@@ -215,12 +218,19 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
- if (spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM)
+ switch (spu.fb.depth_format) {
+ case PIPE_FORMAT_Z32_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
spu.fb.zsize = 4;
- else if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM)
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
spu.fb.zsize = 2;
- else
+ break;
+ default:
spu.fb.zsize = 0;
+ break;
+ }
if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM)
spu.color_shuffle = ((vector unsigned char) {
@@ -248,14 +258,35 @@ cmd_state_blend(const struct pipe_blend_state *state)
static void
-cmd_state_depth_stencil(const struct pipe_depth_stencil_alpha_state *state)
+cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state)
{
if (Debug)
printf("SPU %u: DEPTH_STENCIL: ztest %d\n",
spu.init.id,
- state->depth.enabled);
+ state->read_depth);
+
+ ASSERT_ALIGN16(state->base);
+
+ if (state->size != 0) {
+ mfc_get(depth_stencil_code_buffer,
+ (unsigned int) state->base, /* src */
+ ROUNDUP16(state->size),
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+ } else {
+ /* If there is no code, emit a return instruction.
+ */
+ depth_stencil_code_buffer[0] = 0x35;
+ depth_stencil_code_buffer[1] = 0x00;
+ depth_stencil_code_buffer[2] = 0x00;
+ depth_stencil_code_buffer[3] = 0x00;
+ }
- memcpy(&spu.depth_stencil, state, sizeof(*state));
+ spu.frag_test = (frag_test_func) depth_stencil_code_buffer;
+ spu.read_depth = state->read_depth;
+ spu.read_stencil = state->read_stencil;
}
@@ -415,9 +446,9 @@ cmd_batch(uint opcode)
pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8);
break;
case CELL_CMD_STATE_DEPTH_STENCIL:
- cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *)
+ cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *)
&buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8);
+ pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8);
break;
case CELL_CMD_STATE_SAMPLER:
cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]);
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index a13edd1702..444e218645 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -56,6 +56,17 @@ typedef union {
#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */
+struct spu_frag_test_results {
+ qword mask;
+ qword depth;
+ qword stencil;
+};
+
+typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask,
+ qword pixel_depth, qword pixel_stencil, qword frag_depth,
+ qword frag_alpha, qword facing);
+
+
struct spu_framebuffer {
void *color_start; /**< addr of color surface in main memory */
void *depth_start; /**< addr of depth surface in main memory */
@@ -79,8 +90,9 @@ struct spu_global
struct cell_init_info init;
struct spu_framebuffer fb;
- struct pipe_blend_state blend_stencil;
- struct pipe_depth_stencil_alpha_state depth_stencil;
+ boolean read_depth;
+ boolean read_stencil;
+ frag_test_func frag_test;
struct pipe_blend_state blend;
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
struct cell_command_texture texture;
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
new file mode 100644
index 0000000000..b4cffeeb32
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -0,0 +1,211 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file spu_per_fragment_op.c
+ * SPU implementation various per-fragment operations.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+#include "pipe/p_format.h"
+#include "spu_main.h"
+#include "spu_per_fragment_op.h"
+
+#define ZERO 0x80
+
+static void
+read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
+ enum pipe_format depth_format, qword *depth,
+ qword *stencil)
+{
+ const int ix = x / 2;
+ const int iy = y / 2;
+
+ switch (depth_format) {
+ case PIPE_FORMAT_Z16_UNORM: {
+ qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
+
+ const qword shuf_vec = (qword) {
+ ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
+ ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7
+ };
+
+
+ /* At even X values we want the first 4 shorts, and at odd X values we
+ * want the second 4 shorts.
+ */
+ qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3));
+ qword bias_mask = si_fsmbi(0x3333);
+ qword sv = si_a(shuf_vec, si_and(bias_mask, bias));
+
+ *depth = si_shufb(*ptr, *ptr, sv);
+ *stencil = si_il(0);
+ break;
+ }
+
+
+ case PIPE_FORMAT_Z32_UNORM: {
+ qword *ptr = (qword *) &buffer->ui4[iy][ix];
+
+ *depth = *ptr;
+ *stencil = si_il(0);
+ break;
+ }
+
+
+ case PIPE_FORMAT_Z24S8_UNORM: {
+ qword *ptr = (qword *) &buffer->ui4[iy][ix];
+ qword mask = si_fsmbi(0xEEEE);
+
+ *depth = si_rotmai(si_and(*ptr, mask), -8);
+ *stencil = si_andc(*ptr, mask);
+ break;
+ }
+
+
+ case PIPE_FORMAT_S8Z24_UNORM: {
+ qword *ptr = (qword *) &buffer->ui4[iy][ix];
+
+ *depth = si_and(*ptr, si_fsmbi(0x7777));
+ *stencil = si_andi(si_roti(*ptr, 8), 0x0ff);
+ break;
+ }
+
+
+ default:
+ assert(0);
+ break;
+ }
+}
+
+
+static void
+write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
+ enum pipe_format depth_format,
+ qword depth, qword stencil)
+{
+ const int ix = x / 2;
+ const int iy = y / 2;
+
+ (void) stencil;
+
+ switch (depth_format) {
+ case PIPE_FORMAT_Z16_UNORM: {
+ qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
+
+ qword sv = ((ix & 0x01) == 0)
+ ? (qword) { 2, 3, 6, 7, 10, 11, 14, 15,
+ 24, 25, 26, 27, 28, 29, 30, 31 }
+ : (qword) { 16, 17, 18, 19, 20 , 21, 22, 23,
+ 2, 3, 6, 7, 10, 11, 14, 15 };
+ *ptr = si_shufb(depth, *ptr, sv);
+ break;
+ }
+
+
+ case PIPE_FORMAT_Z32_UNORM: {
+ qword *ptr = (qword *) &buffer->ui4[iy][ix];
+ *ptr = depth;
+ break;
+ }
+
+
+ case PIPE_FORMAT_Z24S8_UNORM: {
+ qword *ptr = (qword *) &buffer->ui4[iy][ix];
+ qword mask = si_fsmbi(0xEEEE);
+
+ depth = si_shli(depth, 8);
+ *ptr = si_selb(stencil, depth, mask);
+ break;
+ }
+
+
+ case PIPE_FORMAT_S8Z24_UNORM: {
+ qword *ptr = (qword *) &buffer->ui4[iy][ix];
+ qword mask = si_fsmbi(0x7777);
+
+ stencil = si_shli(stencil, 24);
+ *ptr = si_selb(stencil, depth, mask);
+ break;
+ }
+
+
+ default:
+ assert(0);
+ break;
+ }
+}
+
+
+qword
+spu_do_depth_stencil(int x, int y,
+ qword frag_mask, qword frag_depth, qword frag_alpha,
+ qword facing)
+{
+ struct spu_frag_test_results result;
+ qword pixel_depth;
+ qword pixel_stencil;
+
+ /* All of this preable code (everthing before the call to frag_test) should
+ * be generated on the PPU and upload to the SPU.
+ */
+ if (spu.read_depth || spu.read_stencil) {
+ read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
+ &pixel_depth, &pixel_stencil);
+ }
+
+ switch (spu.fb.depth_format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu)));
+ frag_depth = si_cfltu(frag_depth, 0);
+ break;
+ case PIPE_FORMAT_Z32_UNORM:
+ frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu)));
+ frag_depth = si_cfltu(frag_depth, 0);
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu)));
+ frag_depth = si_cfltu(frag_depth, 0);
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil,
+ frag_depth, frag_alpha, facing);
+
+
+ /* This code (everthing after the call to frag_test) should
+ * be generated on the PPU and upload to the SPU.
+ */
+ if (spu.read_depth || spu.read_stencil) {
+ write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
+ result.depth, result.stencil);
+ }
+
+ return result.mask;
+}
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
new file mode 100644
index 0000000000..6571258699
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
@@ -0,0 +1,32 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SPU_PER_FRAGMENT_OP
+#define SPU_PER_FRAGMENT_OP
+
+extern qword
+spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth,
+ qword frag_alpha, qword facing);
+
+#endif /* SPU_PER_FRAGMENT_OP */
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
index 20e77aa2e6..6df59abd36 100644
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -98,7 +98,7 @@ my_tile(uint tx, uint ty)
static INLINE void
get_cz_tiles(uint tx, uint ty)
{
- if (spu.depth_stencil.depth.enabled) {
+ if (spu.read_depth) {
if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
//printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
@@ -153,7 +153,7 @@ static INLINE void
wait_put_cz_tiles(void)
{
wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
- if (spu.depth_stencil.depth.enabled) {
+ if (spu.read_depth) {
wait_on_mask(1 << TAG_WRITE_TILE_Z);
}
}
diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h
index 3105b848fd..1b5491112d 100644
--- a/src/gallium/drivers/cell/spu/spu_tile.h
+++ b/src/gallium/drivers/cell/spu/spu_tile.h
@@ -56,13 +56,13 @@ clear_c_tile(tile_t *ctile)
static INLINE void
clear_z_tile(tile_t *ztile)
{
- if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
+ if (spu.fb.zsize == 2) {
memset16((ushort*) ztile->us,
spu.fb.depth_clear_value,
TILE_SIZE * TILE_SIZE);
}
else {
- ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM);
+ ASSERT(spu.fb.zsize != 0);
memset32((uint*) ztile->ui,
spu.fb.depth_clear_value,
TILE_SIZE * TILE_SIZE);
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index be9624cf7d..81823f2463 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -38,8 +38,7 @@
#include "spu_texture.h"
#include "spu_tile.h"
#include "spu_tri.h"
-
-#include "spu_ztest.h"
+#include "spu_per_fragment_op.h"
/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
@@ -264,16 +263,12 @@ do_depth_test(int x, int y, mask_t quadmask)
zvals.v = eval_z((float) x, (float) y);
- if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
- int ix = (x - setup.cliprect_minx) / 4;
- int iy = (y - setup.cliprect_miny) / 2;
- mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask);
- }
- else {
- int ix = (x - setup.cliprect_minx) / 2;
- int iy = (y - setup.cliprect_miny) / 2;
- mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask);
- }
+ mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx,
+ y - setup.cliprect_miny,
+ (qword) quadmask,
+ (qword) zvals.v,
+ (qword) spu_splats((unsigned char) 0x0ffu),
+ (qword) spu_splats((unsigned int) 0x01u));
if (spu_extract(spu_orx(mask), 0))
spu.cur_ztile_status = TILE_STATUS_DIRTY;
@@ -299,7 +294,7 @@ emit_quad( int x, int y, mask_t mask )
sp->quad.first->run(sp->quad.first, &setup.quad);
#else
- if (spu.depth_stencil.depth.enabled) {
+ if (spu.read_depth) {
mask = do_depth_test(x, y, mask);
}
@@ -434,7 +429,7 @@ static void flush_spans( void )
}
ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
- if (spu.depth_stencil.depth.enabled) {
+ if (spu.read_depth) {
if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
/* wait for mfc_get() to complete */
//printf("SPU: %u: waiting for ztile\n", spu.init.id);
diff --git a/src/gallium/drivers/cell/spu/spu_ztest.h b/src/gallium/drivers/cell/spu/spu_ztest.h
deleted file mode 100644
index ce8ad00339..0000000000
--- a/src/gallium/drivers/cell/spu/spu_ztest.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * Zbuffer/depth test code.
- */
-
-
-#ifndef SPU_ZTEST_H
-#define SPU_ZTEST_H
-
-
-#ifdef __SPU__
-#include <spu_intrinsics.h>
-#endif
-
-
-
-/**
- * Perform Z testing for a 16-bit/value Z buffer.
- *
- * \param zvals vector of four fragment zvalues as floats
- * \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this
- * contains the Z values for 2 quads, 8 pixels.
- * \param x x coordinate of quad (only lsbit is significant)
- * \param inMask indicates which fragments in the quad are alive
- * \return new mask indicating which fragments are alive after ztest
- */
-static INLINE vector unsigned int
-spu_z16_test_less(vector float zvals, vector unsigned short *zbuf,
- uint x, vector unsigned int inMask)
-{
-#define ZERO 0x80
- vector unsigned int zvals_ui4, zbuf_ui4, mask;
-
- /* convert floats to uints in [0, 65535] */
- zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */
- zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */
-
- /* XXX this conditional could be removed with a bit of work */
- if (x & 1) {
- /* convert zbuffer values from ushorts to uints */
- /* gather lower four ushorts */
- zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf,
- (vector unsigned int) *zbuf,
- ((vector unsigned char) {
- ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11,
- ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15}));
- /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */
- mask = spu_cmpgt(zbuf_ui4, zvals_ui4);
- /* mask &= inMask */
- mask = spu_and(mask, inMask);
- /* zbuf = mask ? zval : zbuf */
- zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask);
- /* convert zbuffer values from uints back to ushorts, preserve lower 4 */
- *zbuf = (vector unsigned short)
- spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf,
- ((vector unsigned char) {
- 16, 17, 18, 19, 20, 21, 22, 23,
- 2, 3, 6, 7, 10, 11, 14, 15}));
- }
- else {
- /* convert zbuffer values from ushorts to uints */
- /* gather upper four ushorts */
- zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf,
- (vector unsigned int) *zbuf,
- ((vector unsigned char) {
- ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
- ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7}));
- /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */
- mask = spu_cmpgt(zbuf_ui4, zvals_ui4);
- /* mask &= inMask */
- mask = spu_and(mask, inMask);
- /* zbuf = mask ? zval : zbuf */
- zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask);
- /* convert zbuffer values from uints back to ushorts, preserve upper 4 */
- *zbuf = (vector unsigned short)
- spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf,
- ((vector unsigned char) {
- 2, 3, 6, 7, 10, 11, 14, 15,
- 24, 25, 26, 27, 28, 29, 30, 31}));
- }
- return mask;
-#undef ZERO
-}
-
-
-/**
- * As above, but Zbuffer values as 32-bit uints
- */
-static INLINE vector unsigned int
-spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr,
- vector unsigned int inMask)
-{
- vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr;
-
- /* convert floats to uints in [0, 0xffffffff] */
- zvals_ui4 = spu_convtu(zvals, 32);
- /* mask = (zbuf < zvals_ui4) ? ~0 : 0 */
- mask = spu_cmpgt(zbuf, zvals_ui4);
- /* mask &= inMask */
- mask = spu_and(mask, inMask);
- /* zbuf = mask ? zval : zbuf */
- *zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask);
-
- return mask;
-}
-
-
-#endif /* SPU_ZTEST_H */
diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
index 9d5f609220..eb64f51943 100644
--- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
@@ -116,7 +116,7 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render,
}
-static void
+static boolean
i915_vbuf_render_set_primitive( struct vbuf_render *render,
unsigned prim )
{
@@ -125,15 +125,17 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render,
switch(prim) {
case PIPE_PRIM_POINTS:
i915_render->hwprim = PRIM3D_POINTLIST;
- break;
+ return TRUE;
case PIPE_PRIM_LINES:
i915_render->hwprim = PRIM3D_LINELIST;
- break;
+ return TRUE;
case PIPE_PRIM_TRIANGLES:
i915_render->hwprim = PRIM3D_TRILIST;
- break;
+ return TRUE;
default:
- assert(0);
+ /* Actually, can handle a lot more just fine... Fixme.
+ */
+ return FALSE;
}
}
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index db0913cb2b..d940718ed2 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -101,11 +101,20 @@ sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
}
-static void
+static boolean
sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
- cvbr->prim = prim;
+ if (prim == PIPE_PRIM_TRIANGLES ||
+ prim == PIPE_PRIM_LINES ||
+ prim == PIPE_PRIM_POINTS) {
+ cvbr->prim = prim;
+ return TRUE;
+ }
+ else {
+ return FALSE;
+ }
+
}
@@ -207,6 +216,27 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
(struct vertex_header *) ((char *) vertex_buffer + (I) * vertex_size)
switch (cvbr->prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < nr; i++) {
+ prim.v[0] = VERTEX(i);
+ setup->point( setup, &prim );
+ }
+ break;
+ case PIPE_PRIM_LINES:
+ assert(nr % 2 == 0);
+ for (i = 0; i < nr; i += 2) {
+ prim.v[0] = VERTEX(i);
+ prim.v[1] = VERTEX(i + 1);
+ setup->line( setup, &prim );
+ }
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ for (i = 1; i < nr; i++) {
+ prim.v[0] = VERTEX(i - 1);
+ prim.v[1] = VERTEX(i);
+ setup->line( setup, &prim );
+ }
+ break;
case PIPE_PRIM_TRIANGLES:
assert(nr % 3 == 0);
for (i = 0; i < nr; i += 3) {
@@ -217,6 +247,58 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
setup->tri( setup, &prim );
}
break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ assert(nr >= 3);
+ for (i = 2; i < nr; i++) {
+ prim.v[0] = VERTEX(i - 2);
+ prim.v[1] = VERTEX(i - 1);
+ prim.v[2] = VERTEX(i);
+ calc_det(&prim);
+ setup->tri( setup, &prim );
+ }
+ break;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ assert(nr >= 3);
+ for (i = 2; i < nr; i++) {
+ prim.v[0] = VERTEX(0);
+ prim.v[1] = VERTEX(i - 1);
+ prim.v[2] = VERTEX(i);
+ calc_det(&prim);
+ setup->tri( setup, &prim );
+ }
+ break;
+ case PIPE_PRIM_QUADS:
+ assert(nr % 4 == 0);
+ for (i = 0; i < nr; i += 4) {
+ prim.v[0] = VERTEX(i + 0);
+ prim.v[1] = VERTEX(i + 1);
+ prim.v[2] = VERTEX(i + 2);
+ calc_det(&prim);
+ setup->tri( setup, &prim );
+
+ prim.v[0] = VERTEX(i + 0);
+ prim.v[1] = VERTEX(i + 2);
+ prim.v[2] = VERTEX(i + 3);
+ calc_det(&prim);
+ setup->tri( setup, &prim );
+ }
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ assert(nr >= 4);
+ for (i = 2; i < nr; i += 2) {
+ prim.v[0] = VERTEX(i - 2);
+ prim.v[1] = VERTEX(i);
+ prim.v[2] = VERTEX(i + 1);
+ calc_det(&prim);
+ setup->tri( setup, &prim );
+
+ prim.v[0] = VERTEX(i - 2);
+ prim.v[1] = VERTEX(i + 1);
+ prim.v[2] = VERTEX(i - 1);
+ calc_det(&prim);
+ setup->tri( setup, &prim );
+ }
+ break;
case PIPE_PRIM_POLYGON:
/* draw as tri fan */
for (i = 2; i < nr; i++) {
diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h
index f971ad3adc..f3dfa06216 100644
--- a/src/gallium/include/pipe/p_debug.h
+++ b/src/gallium/include/pipe/p_debug.h
@@ -158,6 +158,16 @@ void debug_mask_vprintf(uint32_t uuid,
const char *format,
va_list ap);
+
+#ifdef DEBUG
+#define debug_warning(__msg) \
+ debug_printf("%s:%i:warning: %s\n", __FILE__, __LINE__, (__msg))
+#else
+#define debug_warning(__msg) \
+ ((void)0)
+#endif
+
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h
index ef36ce75f7..c2e0f8c6a5 100644
--- a/src/gallium/include/pipe/p_util.h
+++ b/src/gallium/include/pipe/p_util.h
@@ -88,14 +88,16 @@ FREE( void *ptr )
static INLINE void *
REALLOC( void *old_ptr, unsigned old_size, unsigned new_size )
{
- void *new_ptr;
- if( new_size <= old_size ) {
- return old_ptr;
- }
- new_ptr = MALLOC( new_size );
- if( new_ptr ) {
- memcpy( new_ptr, old_ptr, old_size );
+ void *new_ptr = NULL;
+
+ if (new_size != 0) {
+ new_ptr = MALLOC( new_size );
+
+ if( new_ptr && old_ptr ) {
+ memcpy( new_ptr, old_ptr, old_size );
+ }
}
+
FREE( old_ptr );
return new_ptr;
}
diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c
index e385b9d997..9e5169eff1 100644
--- a/src/mesa/shader/arbprogparse.c
+++ b/src/mesa/shader/arbprogparse.c
@@ -1576,9 +1576,6 @@ parse_attrib_binding(GLcontext * ctx, const GLubyte ** inst,
if (err) {
program_error(ctx, Program->Position, "Bad attribute binding");
}
- else {
- Program->Base.InputsRead |= (1 << *inputReg);
- }
return err;
}
@@ -2557,6 +2554,11 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst,
return 1;
}
+ /* Add attributes to InputsRead only if they are used the program.
+ * This avoids the handling of unused ATTRIB declarations in the drivers. */
+ if (*File == PROGRAM_INPUT)
+ Program->Base.InputsRead |= (1 << *Index);
+
return 0;
}
diff --git a/src/mesa/sources b/src/mesa/sources
index e3d5f22849..287af7121a 100644
--- a/src/mesa/sources
+++ b/src/mesa/sources
@@ -172,6 +172,7 @@ STATETRACKER_SOURCES = \
state_tracker/st_atom_texture.c \
state_tracker/st_atom_viewport.c \
state_tracker/st_cb_accum.c \
+ state_tracker/st_cb_blit.c \
state_tracker/st_cb_bufferobjects.c \
state_tracker/st_cb_clear.c \
state_tracker/st_cb_flush.c \
diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c
new file mode 100644
index 0000000000..dfa79c975c
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -0,0 +1,125 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Brian Paul
+ */
+
+#include "main/imports.h"
+#include "main/image.h"
+#include "main/macros.h"
+#include "main/texformat.h"
+#include "shader/program.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+
+#include "st_context.h"
+#include "st_program.h"
+#include "st_cb_drawpixels.h"
+#include "st_cb_blit.h"
+#include "st_cb_fbo.h"
+
+#include "util/u_blit.h"
+
+#include "cso_cache/cso_context.h"
+
+
+void
+st_init_blit(struct st_context *st)
+{
+ st->blit = util_create_blit(st->pipe);
+}
+
+
+void
+st_destroy_blit(struct st_context *st)
+{
+ util_destroy_blit(st->blit);
+ st->blit = NULL;
+}
+
+
+static void
+st_BlitFramebuffer(GLcontext *ctx,
+ GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+ GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+ GLbitfield mask, GLenum filter)
+{
+ struct st_context *st = ctx->st;
+ struct pipe_context *pipe = st->pipe;
+
+ const uint pFilter = ((filter == GL_NEAREST)
+ ? PIPE_TEX_MIPFILTER_NEAREST
+ : PIPE_TEX_MIPFILTER_LINEAR);
+
+ if (mask & GL_COLOR_BUFFER_BIT) {
+ struct st_renderbuffer *srcRb =
+ st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+ struct st_renderbuffer *dstRb =
+ st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0][0]);
+ struct pipe_surface *srcSurf = srcRb->surface;
+ struct pipe_surface *dstSurf = dstRb->surface;
+
+ srcY0 = srcRb->Base.Height - srcY0;
+ srcY1 = srcRb->Base.Height - srcY1;
+
+ dstY0 = dstRb->Base.Height - dstY0;
+ dstY1 = dstRb->Base.Height - dstY1;
+
+ util_blit_pixels(st->blit,
+ srcSurf, srcX0, srcY0, srcX1, srcY1,
+ dstSurf, dstX0, dstY0, dstX1, dstY1,
+ 0.0, pFilter);
+
+ }
+
+#if 0
+ /* XXX is this sufficient? */
+ st_invalidate_state(ctx, _NEW_COLOR | _NEW_TEXTURE);
+#else
+ /* need to "unset" cso state because we went behind the back of the cso
+ * tracker. Without unset, the _set_ calls would be no-ops.
+ */
+ cso_unset_blend(st->cso_context);
+ cso_unset_depth_stencil_alpha(st->cso_context);
+ cso_unset_rasterizer(st->cso_context);
+ cso_set_blend(st->cso_context, &st->state.blend);
+ cso_set_depth_stencil_alpha(st->cso_context, &st->state.depth_stencil);
+ cso_set_rasterizer(st->cso_context, &st->state.rasterizer);
+ pipe->bind_fs_state(pipe, st->fp->driver_shader);
+ pipe->bind_vs_state(pipe, st->vp->driver_shader);
+#endif
+}
+
+
+
+void
+st_init_blit_functions(struct dd_function_table *functions)
+{
+ functions->BlitFramebuffer = st_BlitFramebuffer;
+}
diff --git a/src/mesa/state_tracker/st_cb_blit.h b/src/mesa/state_tracker/st_cb_blit.h
new file mode 100644
index 0000000000..ed22986b53
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_blit.h
@@ -0,0 +1,46 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef ST_CB_BLIT_H
+#define ST_CB_BLIT_H
+
+
+#include "st_context.h"
+
+
+
+extern void
+st_init_blit(struct st_context *st);
+
+extern void
+st_destroy_blit(struct st_context *st);
+
+extern void
+st_init_blit_functions(struct dd_function_table *functions);
+
+
+#endif /* ST_CB_BLIT_H */
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index 5865071439..693cddedf7 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -48,6 +48,7 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "pipe/p_winsys.h"
+#include "util/u_pack_color.h"
#include "cso_cache/cso_context.h"
@@ -56,55 +57,6 @@
#define TEST_DRAW_PASSTHROUGH 0
-static GLuint
-color_value(enum pipe_format pipeFormat, const GLfloat color[4])
-{
- GLubyte r, g, b, a;
-
- UNCLAMPED_FLOAT_TO_UBYTE(r, color[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(g, color[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(b, color[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(a, color[3]);
-
- switch (pipeFormat) {
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- return (r << 24) | (g << 16) | (b << 8) | a;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- return (a << 24) | (r << 16) | (g << 8) | b;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- return (b << 24) | (g << 16) | (r << 8) | a;
- case PIPE_FORMAT_R5G6B5_UNORM:
- return ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
- default:
- assert(0);
- return 0;
- }
-}
-
-
-static uint
-depth_value(enum pipe_format pipeFormat, GLfloat value)
-{
- switch (pipeFormat) {
- case PIPE_FORMAT_Z16_UNORM:
- return (uint) (value * 0xffff);
- case PIPE_FORMAT_Z32_UNORM:
- /* special-case to avoid overflow */
- if (value == 1.0)
- return 0xffffffff;
- else
- return (uint) (value * 0xffffffff);
- case PIPE_FORMAT_S8Z24_UNORM:
- return (uint) (value * 0xffffff);
- case PIPE_FORMAT_Z24S8_UNORM:
- return ((uint) (value * 0xffffff)) << 8;
- default:
- assert(0);
- return 0;
- }
-}
-
-
static GLboolean
is_depth_stencil_format(enum pipe_format pipeFormat)
{
@@ -405,6 +357,8 @@ clear_with_quad(GLcontext *ctx,
st_invalidate_state(ctx, _NEW_COLOR | _NEW_DEPTH | _NEW_STENCIL);
#else
/* Restore pipe state */
+ cso_set_blend(st->cso_context, &st->state.blend);
+ cso_set_depth_stencil_alpha(st->cso_context, &st->state.depth_stencil);
cso_set_rasterizer(st->cso_context, &st->state.rasterizer);
pipe->bind_fs_state(pipe, st->fp->driver_shader);
pipe->bind_vs_state(pipe, st->vp->driver_shader);
@@ -518,7 +472,6 @@ check_clear_stencil_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb)
-
static void
clear_color_buffer(GLcontext *ctx, struct gl_renderbuffer *rb)
{
@@ -527,10 +480,10 @@ clear_color_buffer(GLcontext *ctx, struct gl_renderbuffer *rb)
clear_with_quad(ctx, GL_TRUE, GL_FALSE, GL_FALSE);
}
else {
- struct st_renderbuffer *strb = st_renderbuffer(rb);
-
/* clear whole buffer w/out masking */
- uint clearValue = color_value(strb->surface->format, ctx->Color.ClearColor);
+ struct st_renderbuffer *strb = st_renderbuffer(rb);
+ uint clearValue;
+ util_pack_color(ctx->Color.ClearColor, strb->surface->format, &clearValue);
ctx->st->pipe->clear(ctx->st->pipe, strb->surface, clearValue);
}
}
@@ -547,7 +500,7 @@ clear_depth_buffer(GLcontext *ctx, struct gl_renderbuffer *rb)
struct st_renderbuffer *strb = st_renderbuffer(rb);
/* simple clear of whole buffer */
- uint clearValue = depth_value(strb->surface->format, ctx->Depth.Clear);
+ uint clearValue = util_pack_z(strb->surface->format, ctx->Depth.Clear);
ctx->st->pipe->clear(ctx->st->pipe, strb->surface, clearValue);
}
}
@@ -591,7 +544,7 @@ clear_depth_stencil_buffer(GLcontext *ctx, struct gl_renderbuffer *rb)
struct st_renderbuffer *strb = st_renderbuffer(rb);
/* clear whole buffer w/out masking */
- GLuint clearValue = depth_value(strb->surface->format, ctx->Depth.Clear);
+ GLuint clearValue = util_pack_z(strb->surface->format, ctx->Depth.Clear);
switch (strb->surface->format) {
case PIPE_FORMAT_S8Z24_UNORM:
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 5458ab420e..e1fc885e0e 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -36,6 +36,7 @@
#include "st_context.h"
#include "st_cb_accum.h"
#include "st_cb_bufferobjects.h"
+#include "st_cb_blit.h"
#include "st_cb_clear.h"
#include "st_cb_drawpixels.h"
#include "st_cb_fbo.h"
@@ -100,6 +101,7 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe )
st_init_atoms( st );
st_init_draw( st );
st_init_generate_mipmap(st);
+ st_init_blit(st);
for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
st->state.sampler_list[i] = &st->state.samplers[i];
@@ -151,6 +153,8 @@ static void st_destroy_context_priv( struct st_context *st )
draw_destroy(st->draw);
st_destroy_atoms( st );
st_destroy_draw( st );
+ st_destroy_generate_mipmap(st);
+ st_destroy_blit(st);
_vbo_DestroyContext(st->ctx);
@@ -217,6 +221,7 @@ void st_init_driver_functions(struct dd_function_table *functions)
st_init_accum_functions(functions);
st_init_bufferobject_functions(functions);
+ st_init_blit_functions(functions);
st_init_clear_functions(functions);
st_init_drawpixels_functions(functions);
st_init_fbo_functions(functions);
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index e81aebba3d..63150dbeaf 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -40,6 +40,9 @@ struct draw_context;
struct draw_stage;
struct cso_cache;
struct cso_blend;
+struct gen_mipmap_state;
+struct blit_state;
+
#define ST_NEW_MESA 0x1 /* Mesa state has changed */
#define ST_NEW_FRAGMENT_PROGRAM 0x2
@@ -146,18 +149,8 @@ struct st_context
struct st_fragment_program *combined_prog;
} bitmap;
- /** For gen/render mipmap feature */
- struct {
- struct pipe_blend_state blend;
- struct pipe_depth_stencil_alpha_state depthstencil;
- struct pipe_rasterizer_state rasterizer;
-
- void *blend_cso;
- void *depthstencil_cso;
- void *rasterizer_cso;
- struct st_fragment_program *stfp;
- struct st_vertex_program *stvp;
- } gen_mipmap;
+ struct gen_mipmap_state *gen_mipmap;
+ struct blit_state *blit;
struct cso_context *cso_context;
};
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 99d2a5fb9e..0962b5f74c 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -143,6 +143,7 @@ void st_init_extensions(struct st_context *st)
ctx->Extensions.EXT_blend_logic_op = GL_TRUE;
ctx->Extensions.EXT_blend_minmax = GL_TRUE;
ctx->Extensions.EXT_blend_subtract = GL_TRUE;
+ ctx->Extensions.EXT_framebuffer_blit = GL_TRUE;
ctx->Extensions.EXT_framebuffer_object = GL_TRUE;
ctx->Extensions.EXT_fog_coord = GL_TRUE;
ctx->Extensions.EXT_multi_draw_arrays = GL_TRUE;
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
index 9c4e1032ef..6c3afca1ba 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -37,6 +37,8 @@
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
#include "pipe/p_winsys.h"
+#include "util/u_gen_mipmap.h"
+
#include "cso_cache/cso_cache.h"
#include "cso_cache/cso_context.h"
@@ -49,55 +51,6 @@
#include "st_cb_texture.h"
-
-static struct st_fragment_program *
-make_tex_fragment_program(GLcontext *ctx)
-{
- struct st_fragment_program *stfp;
- struct gl_program *p;
- GLuint ic = 0;
-
- p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
- if (!p)
- return NULL;
-
- p->NumInstructions = 2;
-
- p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
- if (!p->Instructions) {
- ctx->Driver.DeleteProgram(ctx, p);
- return NULL;
- }
- _mesa_init_instructions(p->Instructions, p->NumInstructions);
-
- /* TEX result.color, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
- p->Instructions[ic].DstReg.Index = FRAG_RESULT_COLR;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
- p->Instructions[ic].TexSrcUnit = 0;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
-
- /* END; */
- p->Instructions[ic++].Opcode = OPCODE_END;
-
- assert(ic == p->NumInstructions);
-
- p->InputsRead = FRAG_BIT_TEX0;
- p->OutputsWritten = (1 << FRAG_RESULT_COLR);
-
- stfp = (struct st_fragment_program *) p;
-
- st_translate_fragment_program(ctx->st, stfp, NULL);
-
- return stfp;
-}
-
-
-
-
/**
* one-time init for generate mipmap
* XXX Note: there may be other times we need no-op/simple state like this.
@@ -106,117 +59,18 @@ make_tex_fragment_program(GLcontext *ctx)
void
st_init_generate_mipmap(struct st_context *st)
{
- struct pipe_context *pipe = st->pipe;
- struct pipe_blend_state blend;
- struct pipe_rasterizer_state rasterizer;
- struct pipe_depth_stencil_alpha_state depthstencil;
-
- /* we don't use blending, but need to set valid values */
- memset(&blend, 0, sizeof(blend));
- blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
- blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
- blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
- blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
- blend.colormask = PIPE_MASK_RGBA;
- st->gen_mipmap.blend = blend;
- st->gen_mipmap.blend_cso = pipe->create_blend_state(pipe, &blend);
-
- memset(&depthstencil, 0, sizeof(depthstencil));
- st->gen_mipmap.depthstencil_cso = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil);
-
- /* Note: we're assuming zero is valid for all non-specified fields */
- memset(&rasterizer, 0, sizeof(rasterizer));
- rasterizer.front_winding = PIPE_WINDING_CW;
- rasterizer.cull_mode = PIPE_WINDING_NONE;
- st->gen_mipmap.rasterizer_cso = pipe->create_rasterizer_state(pipe, &rasterizer);
-
- st->gen_mipmap.stfp = make_tex_fragment_program(st->ctx);
- st->gen_mipmap.stvp = st_make_passthrough_vertex_shader(st, GL_FALSE);
+ st->gen_mipmap = util_create_gen_mipmap(st->pipe);
}
void
-st_destroy_generate_mipmpap(struct st_context *st)
-{
- struct pipe_context *pipe = st->pipe;
-
- pipe->delete_blend_state(pipe, st->gen_mipmap.blend_cso);
- pipe->delete_depth_stencil_alpha_state(pipe, st->gen_mipmap.depthstencil_cso);
- pipe->delete_rasterizer_state(pipe, st->gen_mipmap.rasterizer_cso);
-
- /* XXX free stfp, stvp */
-}
-
-
-static void
-simple_viewport(struct pipe_context *pipe, uint width, uint height)
+st_destroy_generate_mipmap(struct st_context *st)
{
- struct pipe_viewport_state vp;
-
- vp.scale[0] = 0.5 * width;
- vp.scale[1] = -0.5 * height;
- vp.scale[2] = 1.0;
- vp.scale[3] = 1.0;
- vp.translate[0] = 0.5 * width;
- vp.translate[1] = 0.5 * height;
- vp.translate[2] = 0.0;
- vp.translate[3] = 0.0;
-
- pipe->set_viewport_state(pipe, &vp);
+ util_destroy_gen_mipmap(st->gen_mipmap);
+ st->gen_mipmap = NULL;
}
-
-/*
- * Draw simple [-1,1]x[-1,1] quad
- */
-static void
-draw_quad(GLcontext *ctx)
-{
- GLfloat verts[4][2][4]; /* four verts, two attribs, XYZW */
- GLuint i;
- GLfloat sLeft = 0.0, sRight = 1.0;
- GLfloat tTop = 1.0, tBot = 0.0;
- GLfloat x0 = -1.0, x1 = 1.0;
- GLfloat y0 = -1.0, y1 = 1.0;
-
- /* upper-left */
- verts[0][0][0] = x0; /* attr[0].x */
- verts[0][0][1] = y0; /* attr[0].y */
- verts[0][1][0] = sLeft; /* attr[1].s */
- verts[0][1][1] = tTop; /* attr[1].t */
-
- /* upper-right */
- verts[1][0][0] = x1;
- verts[1][0][1] = y0;
- verts[1][1][0] = sRight;
- verts[1][1][1] = tTop;
-
- /* lower-right */
- verts[2][0][0] = x1;
- verts[2][0][1] = y1;
- verts[2][1][0] = sRight;
- verts[2][1][1] = tBot;
-
- /* lower-left */
- verts[3][0][0] = x0;
- verts[3][0][1] = y1;
- verts[3][1][0] = sLeft;
- verts[3][1][1] = tBot;
-
- /* same for all verts: */
- for (i = 0; i < 4; i++) {
- verts[i][0][2] = 0.0; /*Z*/
- verts[i][0][3] = 1.0; /*W*/
- verts[i][1][2] = 0.0; /*R*/
- verts[i][1][3] = 1.0; /*Q*/
- }
-
- st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2, GL_TRUE);
-}
-
-
-
/**
* Generate mipmap levels using hardware rendering.
* \return TRUE if successful, FALSE if not possible
@@ -229,12 +83,7 @@ st_render_mipmap(struct st_context *st,
{
struct pipe_context *pipe = st->pipe;
struct pipe_screen *screen = pipe->screen;
- struct pipe_framebuffer_state fb;
- struct pipe_sampler_state sampler;
- void *sampler_cso;
- const uint face = _mesa_tex_target_to_face(target), zslice = 0;
- /*const uint first_level_save = pt->first_level;*/
- uint dstLevel;
+ const uint face = _mesa_tex_target_to_face(target);
assert(target != GL_TEXTURE_3D); /* not done yet */
@@ -243,66 +92,7 @@ st_render_mipmap(struct st_context *st,
return FALSE;
}
- /* init framebuffer state */
- memset(&fb, 0, sizeof(fb));
- fb.num_cbufs = 1;
-
- /* sampler state */
- memset(&sampler, 0, sizeof(sampler));
- sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
- sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
- sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
- sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
- sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
- sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
- sampler.normalized_coords = 1;
-
-
- /* bind state */
- cso_set_blend(st->cso_context, &st->gen_mipmap.blend);
- cso_set_depth_stencil_alpha(st->cso_context, &st->gen_mipmap.depthstencil);
- cso_set_rasterizer(st->cso_context, &st->gen_mipmap.rasterizer);
-
- /* bind shaders */
- pipe->bind_fs_state(pipe, st->gen_mipmap.stfp->driver_shader);
- pipe->bind_vs_state(pipe, st->gen_mipmap.stvp->driver_shader);
-
- /*
- * XXX for small mipmap levels, it may be faster to use the software
- * fallback path...
- */
- for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
- const uint srcLevel = dstLevel - 1;
-
- /*
- * Setup framebuffer / dest surface
- */
- fb.cbufs[0] = screen->get_tex_surface(screen, pt, face, dstLevel, zslice);
- pipe->set_framebuffer_state(pipe, &fb);
-
- /*
- * Setup sampler state
- */
- sampler.min_lod = sampler.max_lod = srcLevel;
- sampler_cso = pipe->create_sampler_state(pipe, &sampler);
- pipe->bind_sampler_states(pipe, 1, &sampler_cso);
-
- simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]);
-
- /*
- * Setup src texture, override pt->first_level so we sample from
- * the right mipmap level.
- */
- /*pt->first_level = srcLevel;*/
- pipe->set_sampler_textures(pipe, 1, &pt);
-
- draw_quad(st->ctx);
-
- pipe->delete_sampler_state(pipe, sampler_cso);
- }
-
- /* restore first_level */
- /*pt->first_level = first_level_save;*/
+ util_gen_mipmap(st->gen_mipmap, pt, face, baseLevel, lastLevel);
/* restore pipe state */
#if 0
diff --git a/src/mesa/state_tracker/st_gen_mipmap.h b/src/mesa/state_tracker/st_gen_mipmap.h
index 7668c1e44e..00fbae9302 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.h
+++ b/src/mesa/state_tracker/st_gen_mipmap.h
@@ -35,7 +35,7 @@ st_init_generate_mipmap(struct st_context *st);
extern void
-st_destroy_generate_mipmpap(struct st_context *st);
+st_destroy_generate_mipmap(struct st_context *st);
extern void
diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c
index ad4cc62d5f..a180441a5a 100644
--- a/src/mesa/tnl/t_vertex_sse.c
+++ b/src/mesa/tnl/t_vertex_sse.c
@@ -348,8 +348,6 @@ static GLboolean build_vertex_emit( struct x86_program *p )
struct x86_reg vp1 = x86_make_reg(file_XMM, 2);
GLubyte *fixup, *label;
- x86_init_func(&p->func);
-
/* Push a few regs?
*/
x86_push(&p->func, countEBP);
@@ -641,7 +639,7 @@ void _tnl_generate_sse_emit( GLcontext *ctx )
p.ctx = ctx;
p.inputs_safe = 0; /* for now */
- p.outputs_safe = 1; /* for now */
+ p.outputs_safe = 0; /* for now */
p.have_sse2 = cpu_has_xmm2;
p.identity = x86_make_reg(file_XMM, 6);
p.chan0 = x86_make_reg(file_XMM, 7);
diff --git a/src/mesa/vf/vf_sse.c b/src/mesa/vf/vf_sse.c
index 3ce76e2b04..c3a2166578 100644
--- a/src/mesa/vf/vf_sse.c
+++ b/src/mesa/vf/vf_sse.c
@@ -633,7 +633,7 @@ void vf_generate_sse_emit( struct vertex_fetch *vf )
p.vf = vf;
p.inputs_safe = 0; /* for now */
- p.outputs_safe = 1; /* for now */
+ p.outputs_safe = 0; /* for now */
p.have_sse2 = cpu_has_xmm2;
p.identity = x86_make_reg(file_XMM, 6);
p.chan0 = x86_make_reg(file_XMM, 7);