summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKeith Whitwell <keithw@vmware.com>2009-07-24 16:49:35 +0100
committerJosé Fonseca <jfonseca@vmware.com>2009-08-29 09:21:18 +0100
commitbdbb4beb21876010b14785569a920fa65a67d1ad (patch)
tree40b8f2d0f6d4020c4229ab2e582f9b7e2a03bb5c /src
parent4486012245c5f526059d3872ac3561f53705d1cf (diff)
llvmpipe: expand quad pipeline to process >1 quad at a time
This is part one -- we still only pass a single quad down, but the code can now cope with more. The quads must all be from the same tile.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_alpha_test.c106
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_blend.c730
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_colormask.c15
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_coverage.c48
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_depth_test.c23
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_earlyz.c28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_fs.c40
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_occlusion.c10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_output.c49
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_pipe.c88
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_pipe.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_stencil.c185
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad_stipple.c48
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c4
14 files changed, 745 insertions, 633 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_alpha_test.c b/src/gallium/drivers/llvmpipe/lp_quad_alpha_test.c
index eea4ef9c85..947daf5695 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_alpha_test.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_alpha_test.c
@@ -9,76 +9,80 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
+#define ALPHATEST( FUNC, COMP ) \
+ static void \
+ alpha_test_quads_##FUNC( struct quad_stage *qs, \
+ struct quad_header *quads[], \
+ unsigned nr ) \
+ { \
+ const float ref = qs->llvmpipe->depth_stencil->alpha.ref_value; \
+ const uint cbuf = 0; /* only output[0].alpha is tested */ \
+ unsigned pass_nr = 0; \
+ unsigned i; \
+ \
+ for (i = 0; i < nr; i++) { \
+ const float *aaaa = quads[i]->output.color[cbuf][3]; \
+ unsigned passMask = 0; \
+ \
+ if (aaaa[0] COMP ref) passMask |= (1 << 0); \
+ if (aaaa[1] COMP ref) passMask |= (1 << 1); \
+ if (aaaa[2] COMP ref) passMask |= (1 << 2); \
+ if (aaaa[3] COMP ref) passMask |= (1 << 3); \
+ \
+ quads[i]->inout.mask &= passMask; \
+ \
+ if (quads[i]->inout.mask) \
+ quads[pass_nr++] = quads[i]; \
+ } \
+ \
+ if (pass_nr) \
+ qs->next->run(qs->next, quads, pass_nr); \
+ }
+
+
+ALPHATEST( LESS, < )
+ALPHATEST( EQUAL, == )
+ALPHATEST( LEQUAL, <= )
+ALPHATEST( GREATER, > )
+ALPHATEST( NOTEQUAL, != )
+ALPHATEST( GEQUAL, >= )
+
+/* XXX: Incorporate into shader using KILP.
+ */
static void
-alpha_test_quad(struct quad_stage *qs, struct quad_header *quad)
+alpha_test_quad(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
{
- struct llvmpipe_context *llvmpipe = qs->llvmpipe;
- const float ref = llvmpipe->depth_stencil->alpha.ref_value;
- unsigned passMask = 0x0, j;
- const uint cbuf = 0; /* only output[0].alpha is tested */
- const float *aaaa = quad->output.color[cbuf][3];
-
- switch (llvmpipe->depth_stencil->alpha.func) {
- case PIPE_FUNC_NEVER:
- break;
+ switch (qs->llvmpipe->depth_stencil->alpha.func) {
case PIPE_FUNC_LESS:
- /*
- * If mask were an array [4] we could do this SIMD-style:
- * passMask = (quad->outputs.color[0][3] <= vec4(ref));
- */
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] < ref) {
- passMask |= (1 << j);
- }
- }
+ alpha_test_quads_LESS( qs, quads, nr );
break;
case PIPE_FUNC_EQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] == ref) {
- passMask |= (1 << j);
- }
- }
+ alpha_test_quads_EQUAL( qs, quads, nr );
break;
case PIPE_FUNC_LEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] <= ref) {
- passMask |= (1 << j);
- }
- }
+ alpha_test_quads_LEQUAL( qs, quads, nr );
break;
case PIPE_FUNC_GREATER:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] > ref) {
- passMask |= (1 << j);
- }
- }
+ alpha_test_quads_GREATER( qs, quads, nr );
break;
case PIPE_FUNC_NOTEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] != ref) {
- passMask |= (1 << j);
- }
- }
+ alpha_test_quads_NOTEQUAL( qs, quads, nr );
break;
case PIPE_FUNC_GEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (aaaa[j] >= ref) {
- passMask |= (1 << j);
- }
- }
+ alpha_test_quads_GEQUAL( qs, quads, nr );
break;
case PIPE_FUNC_ALWAYS:
- passMask = MASK_ALL;
+ assert(0); /* should be caught earlier */
+ qs->next->run(qs->next, quads, nr);
break;
+ case PIPE_FUNC_NEVER:
default:
- assert(0);
+ assert(0); /* should be caught earlier */
+ return;
}
-
- quad->inout.mask &= passMask;
-
- if (quad->inout.mask)
- qs->next->run(qs->next, quad);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_blend.c b/src/gallium/drivers/llvmpipe/lp_quad_blend.c
index 98603be52e..6beb964739 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_blend.c
@@ -117,10 +117,16 @@ do { \
static void
-logicop_quad(struct quad_stage *qs, struct quad_header *quad)
+logicop_quad(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
uint cbuf;
+ struct llvmpipe_cached_tile *
+ tile = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
+ quads[0]->input.x0,
+ quads[0]->input.y0);
/* loop over colorbuffer outputs */
for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
@@ -129,165 +135,161 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad)
uint *src4 = (uint *) src;
uint *dst4 = (uint *) dst;
uint *res4 = (uint *) res;
- struct llvmpipe_cached_tile *
- tile = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
- quad->input.x0, quad->input.y0);
- float (*quadColor)[4] = quad->output.color[cbuf];
uint i, j;
- /* get/swizzle dest colors */
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
- int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
- for (i = 0; i < 4; i++) {
- dest[i][j] = tile->data.color[y][x][i];
+ for (i = 0; i < nr; i++) {
+ struct quad_header *quad = quads[i];
+ float (*quadColor)[4] = quad->output.color[cbuf];
+
+ /* get/swizzle dest colors */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
+ int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
+ for (i = 0; i < 4; i++) {
+ dest[i][j] = tile->data.color[y][x][i];
+ }
}
- }
- /* convert to ubyte */
- for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
- dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
- dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
- dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
- dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
-
- src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
- src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
- src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
- src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
- }
+ /* convert to ubyte */
+ for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
+ dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
+ dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
+ dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
+ dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
+
+ src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
+ src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
+ src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
+ src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
+ }
- switch (llvmpipe->blend->logicop_func) {
- case PIPE_LOGICOP_CLEAR:
- for (j = 0; j < 4; j++)
- res4[j] = 0;
- break;
- case PIPE_LOGICOP_NOR:
- for (j = 0; j < 4; j++)
- res4[j] = ~(src4[j] | dst4[j]);
- break;
- case PIPE_LOGICOP_AND_INVERTED:
- for (j = 0; j < 4; j++)
- res4[j] = ~src4[j] & dst4[j];
- break;
- case PIPE_LOGICOP_COPY_INVERTED:
- for (j = 0; j < 4; j++)
- res4[j] = ~src4[j];
- break;
- case PIPE_LOGICOP_AND_REVERSE:
- for (j = 0; j < 4; j++)
- res4[j] = src4[j] & ~dst4[j];
- break;
- case PIPE_LOGICOP_INVERT:
- for (j = 0; j < 4; j++)
- res4[j] = ~dst4[j];
- break;
- case PIPE_LOGICOP_XOR:
- for (j = 0; j < 4; j++)
- res4[j] = dst4[j] ^ src4[j];
- break;
- case PIPE_LOGICOP_NAND:
- for (j = 0; j < 4; j++)
- res4[j] = ~(src4[j] & dst4[j]);
- break;
- case PIPE_LOGICOP_AND:
- for (j = 0; j < 4; j++)
- res4[j] = src4[j] & dst4[j];
- break;
- case PIPE_LOGICOP_EQUIV:
- for (j = 0; j < 4; j++)
- res4[j] = ~(src4[j] ^ dst4[j]);
- break;
- case PIPE_LOGICOP_NOOP:
- for (j = 0; j < 4; j++)
- res4[j] = dst4[j];
- break;
- case PIPE_LOGICOP_OR_INVERTED:
- for (j = 0; j < 4; j++)
- res4[j] = ~src4[j] | dst4[j];
- break;
- case PIPE_LOGICOP_COPY:
- for (j = 0; j < 4; j++)
- res4[j] = src4[j];
- break;
- case PIPE_LOGICOP_OR_REVERSE:
- for (j = 0; j < 4; j++)
- res4[j] = src4[j] | ~dst4[j];
- break;
- case PIPE_LOGICOP_OR:
- for (j = 0; j < 4; j++)
- res4[j] = src4[j] | dst4[j];
- break;
- case PIPE_LOGICOP_SET:
- for (j = 0; j < 4; j++)
- res4[j] = ~0;
- break;
- default:
- assert(0);
- }
+ switch (llvmpipe->blend->logicop_func) {
+ case PIPE_LOGICOP_CLEAR:
+ for (j = 0; j < 4; j++)
+ res4[j] = 0;
+ break;
+ case PIPE_LOGICOP_NOR:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~(src4[j] | dst4[j]);
+ break;
+ case PIPE_LOGICOP_AND_INVERTED:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~src4[j] & dst4[j];
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~src4[j];
+ break;
+ case PIPE_LOGICOP_AND_REVERSE:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] & ~dst4[j];
+ break;
+ case PIPE_LOGICOP_INVERT:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~dst4[j];
+ break;
+ case PIPE_LOGICOP_XOR:
+ for (j = 0; j < 4; j++)
+ res4[j] = dst4[j] ^ src4[j];
+ break;
+ case PIPE_LOGICOP_NAND:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~(src4[j] & dst4[j]);
+ break;
+ case PIPE_LOGICOP_AND:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] & dst4[j];
+ break;
+ case PIPE_LOGICOP_EQUIV:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~(src4[j] ^ dst4[j]);
+ break;
+ case PIPE_LOGICOP_NOOP:
+ for (j = 0; j < 4; j++)
+ res4[j] = dst4[j];
+ break;
+ case PIPE_LOGICOP_OR_INVERTED:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~src4[j] | dst4[j];
+ break;
+ case PIPE_LOGICOP_COPY:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j];
+ break;
+ case PIPE_LOGICOP_OR_REVERSE:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] | ~dst4[j];
+ break;
+ case PIPE_LOGICOP_OR:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] | dst4[j];
+ break;
+ case PIPE_LOGICOP_SET:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~0;
+ break;
+ default:
+ assert(0);
+ }
- for (j = 0; j < 4; j++) {
- quadColor[j][0] = ubyte_to_float(res[j][0]);
- quadColor[j][1] = ubyte_to_float(res[j][1]);
- quadColor[j][2] = ubyte_to_float(res[j][2]);
- quadColor[j][3] = ubyte_to_float(res[j][3]);
+ for (j = 0; j < 4; j++) {
+ quadColor[j][0] = ubyte_to_float(res[j][0]);
+ quadColor[j][1] = ubyte_to_float(res[j][1]);
+ quadColor[j][2] = ubyte_to_float(res[j][2]);
+ quadColor[j][3] = ubyte_to_float(res[j][3]);
+ }
}
}
-
- /* pass quad to next stage */
- qs->next->run(qs->next, quad);
}
-
-
static void
-blend_quad(struct quad_stage *qs, struct quad_header *quad)
+blend_quads(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
{
static const float zero[4] = { 0, 0, 0, 0 };
static const float one[4] = { 1, 1, 1, 1 };
-
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
uint cbuf;
- if (llvmpipe->blend->logicop_enable) {
- logicop_quad(qs, quad);
- return;
- }
-
/* loop over colorbuffer outputs */
for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
float source[4][QUAD_SIZE], dest[4][QUAD_SIZE];
struct llvmpipe_cached_tile *tile
= lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
- quad->input.x0, quad->input.y0);
- float (*quadColor)[4] = quad->output.color[cbuf];
- uint i, j;
-
- /* get/swizzle dest colors */
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
- int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
- for (i = 0; i < 4; i++) {
- dest[i][j] = tile->data.color[y][x][i];
+ quads[0]->input.x0,
+ quads[0]->input.y0);
+ uint q, i, j;
+
+ for (q = 0; q < nr; q++) {
+ struct quad_header *quad = quads[q];
+ float (*quadColor)[4] = quad->output.color[cbuf];
+
+ /* get/swizzle dest colors */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
+ int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
+ for (i = 0; i < 4; i++) {
+ dest[i][j] = tile->data.color[y][x][i];
+ }
}
- }
- /*
- * Compute src/first term RGB
- */
- switch (llvmpipe->blend->rgb_src_factor) {
- case PIPE_BLENDFACTOR_ONE:
- VEC4_COPY(source[0], quadColor[0]); /* R */
- VEC4_COPY(source[1], quadColor[1]); /* G */
- VEC4_COPY(source[2], quadColor[2]); /* B */
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
- VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
- VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
+ /*
+ * Compute src/first term RGB
+ */
+ switch (llvmpipe->blend->rgb_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ VEC4_COPY(source[0], quadColor[0]); /* R */
+ VEC4_COPY(source[1], quadColor[1]); /* G */
+ VEC4_COPY(source[2], quadColor[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
+ VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
+ VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
{
const float *alpha = quadColor[3];
VEC4_MUL(source[0], quadColor[0], alpha); /* R */
@@ -295,12 +297,12 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], alpha); /* B */
}
break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
- VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
- VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
- break;
- case PIPE_BLENDFACTOR_DST_ALPHA:
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
+ VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
+ VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
{
const float *alpha = dest[3];
VEC4_MUL(source[0], quadColor[0], alpha); /* R */
@@ -308,7 +310,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], alpha); /* B */
}
break;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
{
const float *alpha = quadColor[3];
float diff[4], temp[4];
@@ -319,7 +321,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], temp); /* B */
}
break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_COLOR:
{
float comp[4];
VEC4_SCALAR(comp, llvmpipe->blend_color.color[0]); /* R */
@@ -330,7 +332,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], comp); /* B */
}
break;
- case PIPE_BLENDFACTOR_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
{
float alpha[4];
VEC4_SCALAR(alpha, llvmpipe->blend_color.color[3]);
@@ -339,18 +341,18 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], alpha); /* B */
}
break;
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- assert(0); /* to do */
- break;
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- assert(0); /* to do */
- break;
- case PIPE_BLENDFACTOR_ZERO:
- VEC4_COPY(source[0], zero); /* R */
- VEC4_COPY(source[1], zero); /* G */
- VEC4_COPY(source[2], zero); /* B */
- break;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(source[0], zero); /* R */
+ VEC4_COPY(source[1], zero); /* G */
+ VEC4_COPY(source[2], zero); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
@@ -361,7 +363,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
}
break;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
{
float inv_alpha[4];
VEC4_SUB(inv_alpha, one, quadColor[3]);
@@ -370,7 +372,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
}
break;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
{
float inv_alpha[4];
VEC4_SUB(inv_alpha, one, dest[3]);
@@ -379,7 +381,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
}
break;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, dest[0]); /* R */
@@ -390,7 +392,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
}
break;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
{
float inv_comp[4];
/* R */
@@ -404,7 +406,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_comp);
}
break;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
{
float inv_alpha[4];
VEC4_SCALAR(inv_alpha, 1.0f - llvmpipe->blend_color.color[3]);
@@ -413,73 +415,73 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
}
break;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- assert(0); /* to do */
- break;
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- assert(0); /* to do */
- break;
- default:
- assert(0);
- }
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ assert(0); /* to do */
+ break;
+ default:
+ assert(0);
+ }
- /*
- * Compute src/first term A
- */
- switch (llvmpipe->blend->alpha_src_factor) {
- case PIPE_BLENDFACTOR_ONE:
- VEC4_COPY(source[3], quadColor[3]); /* A */
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_SRC_ALPHA:
+ /*
+ * Compute src/first term A
+ */
+ switch (llvmpipe->blend->alpha_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ VEC4_COPY(source[3], quadColor[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
{
const float *alpha = quadColor[3];
VEC4_MUL(source[3], quadColor[3], alpha); /* A */
}
break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_DST_ALPHA:
- VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- /* multiply alpha by 1.0 */
- VEC4_COPY(source[3], quadColor[3]); /* A */
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ /* multiply alpha by 1.0 */
+ VEC4_COPY(source[3], quadColor[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
{
float comp[4];
VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
VEC4_MUL(source[3], quadColor[3], comp); /* A */
}
break;
- case PIPE_BLENDFACTOR_ZERO:
- VEC4_COPY(source[3], zero); /* A */
- break;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(source[3], zero); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
{
float inv_alpha[4];
VEC4_SUB(inv_alpha, one, quadColor[3]);
VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
}
break;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
{
float inv_alpha[4];
VEC4_SUB(inv_alpha, one, dest[3]);
VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
}
break;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
{
float inv_comp[4];
/* A */
@@ -487,42 +489,42 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[3], quadColor[3], inv_comp);
}
break;
- default:
- assert(0);
- }
+ default:
+ assert(0);
+ }
- /*
- * Compute dest/second term RGB
- */
- switch (llvmpipe->blend->rgb_dst_factor) {
- case PIPE_BLENDFACTOR_ONE:
- /* dest = dest * 1 NO-OP, leave dest as-is */
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
- VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
- VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
- VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
- VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
- break;
- case PIPE_BLENDFACTOR_DST_ALPHA:
- VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
- VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
- VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
- VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
- VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- assert(0); /* illegal */
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
+ /*
+ * Compute dest/second term RGB
+ */
+ switch (llvmpipe->blend->rgb_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ /* dest = dest * 1 NO-OP, leave dest as-is */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
+ VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
+ VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
+ VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
+ VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
+ VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
+ VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
+ VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
+ VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ assert(0); /* illegal */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
{
float comp[4];
VEC4_SCALAR(comp, llvmpipe->blend_color.color[0]); /* R */
@@ -533,7 +535,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], comp); /* B */
}
break;
- case PIPE_BLENDFACTOR_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
{
float comp[4];
VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
@@ -542,17 +544,17 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], comp); /* B */
}
break;
- case PIPE_BLENDFACTOR_ZERO:
- VEC4_COPY(dest[0], zero); /* R */
- VEC4_COPY(dest[1], zero); /* G */
- VEC4_COPY(dest[2], zero); /* B */
- break;
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- /* XXX what are these? */
- assert(0);
- break;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(dest[0], zero); /* R */
+ VEC4_COPY(dest[1], zero); /* G */
+ VEC4_COPY(dest[2], zero); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ /* XXX what are these? */
+ assert(0);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
@@ -563,7 +565,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
}
break;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
{
float one_minus_alpha[QUAD_SIZE];
VEC4_SUB(one_minus_alpha, one, quadColor[3]);
@@ -572,7 +574,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
}
break;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, dest[3]); /* A */
@@ -581,7 +583,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
}
break;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, dest[0]); /* R */
@@ -592,7 +594,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
}
break;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
{
float inv_comp[4];
/* R */
@@ -606,7 +608,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], inv_comp);
}
break;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
{
float inv_comp[4];
VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]);
@@ -615,138 +617,154 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], inv_comp);
}
break;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- /* XXX what are these? */
- assert(0);
- break;
- default:
- assert(0);
- }
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* XXX what are these? */
+ assert(0);
+ break;
+ default:
+ assert(0);
+ }
- /*
- * Compute dest/second term A
- */
- switch (llvmpipe->blend->alpha_dst_factor) {
- case PIPE_BLENDFACTOR_ONE:
- /* dest = dest * 1 NO-OP, leave dest as-is */
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_DST_ALPHA:
- VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- assert(0); /* illegal */
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_CONST_ALPHA:
+ /*
+ * Compute dest/second term A
+ */
+ switch (llvmpipe->blend->alpha_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ /* dest = dest * 1 NO-OP, leave dest as-is */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ assert(0); /* illegal */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
{
float comp[4];
VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
VEC4_MUL(dest[3], dest[3], comp); /* A */
}
break;
- case PIPE_BLENDFACTOR_ZERO:
- VEC4_COPY(dest[3], zero); /* A */
- break;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(dest[3], zero); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
{
float one_minus_alpha[QUAD_SIZE];
VEC4_SUB(one_minus_alpha, one, quadColor[3]);
VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
}
break;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, dest[3]); /* A */
VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
}
break;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
{
float inv_comp[4];
VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]);
VEC4_MUL(dest[3], dest[3], inv_comp);
}
break;
- default:
- assert(0);
- }
+ default:
+ assert(0);
+ }
- /*
- * Combine RGB terms
- */
- switch (llvmpipe->blend->rgb_func) {
- case PIPE_BLEND_ADD:
- VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
- VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
- VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
- break;
- case PIPE_BLEND_SUBTRACT:
- VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
- VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
- VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
- break;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
- VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
- VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
- break;
- case PIPE_BLEND_MIN:
- VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
- VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
- VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
- break;
- case PIPE_BLEND_MAX:
- VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
- VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
- VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
- break;
- default:
- assert(0);
- }
+ /*
+ * Combine RGB terms
+ */
+ switch (llvmpipe->blend->rgb_func) {
+ case PIPE_BLEND_ADD:
+ VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
+ VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
+ VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
+ break;
+ case PIPE_BLEND_MIN:
+ VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ case PIPE_BLEND_MAX:
+ VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ default:
+ assert(0);
+ }
- /*
- * Combine A terms
- */
- switch (llvmpipe->blend->alpha_func) {
- case PIPE_BLEND_ADD:
- VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
- break;
- case PIPE_BLEND_SUBTRACT:
- VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
- break;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
- break;
- case PIPE_BLEND_MIN:
- VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
- break;
- case PIPE_BLEND_MAX:
- VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
- break;
- default:
- assert(0);
+ /*
+ * Combine A terms
+ */
+ switch (llvmpipe->blend->alpha_func) {
+ case PIPE_BLEND_ADD:
+ VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
+ break;
+ case PIPE_BLEND_MIN:
+ VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ case PIPE_BLEND_MAX:
+ VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ default:
+ assert(0);
+ }
}
-
} /* cbuf loop */
+}
+
+
+static void
+blend_quad(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ struct llvmpipe_context *llvmpipe = qs->llvmpipe;
+
+ if (llvmpipe->blend->logicop_enable) {
+ logicop_quad(qs, quads, nr);
+ }
+ else if (llvmpipe->blend->blend_enable) {
+ blend_quads(qs, quads, nr );
+ }
/* pass blended quad to next stage */
- qs->next->run(qs->next, quad);
+ qs->next->run(qs->next, quads, nr);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_colormask.c b/src/gallium/drivers/llvmpipe/lp_quad_colormask.c
index 205dea4882..df811a72d7 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_colormask.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_colormask.c
@@ -84,12 +84,23 @@ colormask_quad(struct quad_stage *qs, struct quad_header *quad)
if (!(llvmpipe->blend->colormask & PIPE_MASK_A))
COPY_4V(quadColor[3], dest[3]);
}
+}
+
+static void
+colormask_quads(struct quad_stage *qs, struct quad_header *quads[],
+ unsigned nr)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; i++)
+ colormask_quad(qs, quads[i]);
/* pass quad to next stage */
- qs->next->run(qs->next, quad);
+ qs->next->run(qs->next, quads, nr);
}
+
static void colormask_begin(struct quad_stage *qs)
{
qs->next->begin(qs->next);
@@ -108,7 +119,7 @@ struct quad_stage *lp_quad_colormask_stage( struct llvmpipe_context *llvmpipe )
stage->llvmpipe = llvmpipe;
stage->begin = colormask_begin;
- stage->run = colormask_quad;
+ stage->run = colormask_quads;
stage->destroy = colormask_destroy;
return stage;
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_coverage.c b/src/gallium/drivers/llvmpipe/lp_quad_coverage.c
index 01c5982e85..b7b531d836 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_coverage.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_coverage.c
@@ -42,33 +42,47 @@
/**
* Multiply quad's alpha values by the fragment coverage.
*/
-static void
+static INLINE void
coverage_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
- const uint prim = quad->input.prim;
+ uint cbuf;
+
+ /* loop over colorbuffer outputs */
+ for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
+ float (*quadColor)[4] = quad->output.color[cbuf];
+ unsigned j;
+ for (j = 0; j < QUAD_SIZE; j++) {
+ assert(quad->input.coverage[j] >= 0.0);
+ assert(quad->input.coverage[j] <= 1.0);
+ quadColor[3][j] *= quad->input.coverage[j];
+ }
+ }
+}
+
+
+/* XXX: Incorporate into shader after alpha_test.
+ */
+static void
+coverage_run(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ struct llvmpipe_context *llvmpipe = qs->llvmpipe;
+ const uint prim = quads[0]->input.prim;
+ unsigned i;
if ((llvmpipe->rasterizer->poly_smooth && prim == QUAD_PRIM_TRI) ||
(llvmpipe->rasterizer->line_smooth && prim == QUAD_PRIM_LINE) ||
(llvmpipe->rasterizer->point_smooth && prim == QUAD_PRIM_POINT)) {
- uint cbuf;
-
- /* loop over colorbuffer outputs */
- for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
- float (*quadColor)[4] = quad->output.color[cbuf];
- unsigned j;
- for (j = 0; j < QUAD_SIZE; j++) {
- assert(quad->input.coverage[j] >= 0.0);
- assert(quad->input.coverage[j] <= 1.0);
- quadColor[3][j] *= quad->input.coverage[j];
- }
- }
+
+ for (i = 0; i < nr; i++)
+ coverage_quad( qs, quads[i] );
}
- qs->next->run(qs->next, quad);
+ qs->next->run(qs->next, quads, nr);
}
-
static void coverage_begin(struct quad_stage *qs)
{
qs->next->begin(qs->next);
@@ -87,7 +101,7 @@ struct quad_stage *lp_quad_coverage_stage( struct llvmpipe_context *llvmpipe )
stage->llvmpipe = llvmpipe;
stage->begin = coverage_begin;
- stage->run = coverage_quad;
+ stage->run = coverage_run;
stage->destroy = coverage_destroy;
return stage;
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c b/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
index fdb64ac3b4..8ecd68393f 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
@@ -49,7 +49,7 @@
* Try to effectively do that with codegen...
*/
-void
+boolean
lp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
@@ -193,6 +193,8 @@ lp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
}
quad->inout.mask &= zmask;
+ if (quad->inout.mask == 0)
+ return FALSE;
if (llvmpipe->depth_stencil->depth.writemask) {
@@ -252,16 +254,25 @@ lp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
assert(0);
}
}
+
+ return TRUE;
}
static void
-depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
+depth_test_quads(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
{
- lp_depth_test_quad(qs, quad);
+ unsigned i, pass = 0;
- if (quad->inout.mask)
- qs->next->run(qs->next, quad);
+ for (i = 0; i < nr; i++) {
+ if (lp_depth_test_quad(qs, quads[i]))
+ quads[pass++] = quads[i];
+ }
+
+ if (pass)
+ qs->next->run(qs->next, quads, pass);
}
@@ -283,7 +294,7 @@ struct quad_stage *lp_quad_depth_test_stage( struct llvmpipe_context *llvmpipe )
stage->llvmpipe = llvmpipe;
stage->begin = depth_test_begin;
- stage->run = depth_test_quad;
+ stage->run = depth_test_quads;
stage->destroy = depth_test_destroy;
return stage;
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_earlyz.c b/src/gallium/drivers/llvmpipe/lp_quad_earlyz.c
index e4b4c3b55c..915d2d9f78 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_earlyz.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_earlyz.c
@@ -43,20 +43,26 @@
static void
earlyz_quad(
struct quad_stage *qs,
- struct quad_header *quad )
+ struct quad_header *quads[],
+ unsigned nr )
{
- const float fx = (float) quad->input.x0;
- const float fy = (float) quad->input.y0;
- const float dzdx = quad->posCoef->dadx[2];
- const float dzdy = quad->posCoef->dady[2];
- const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
+ const float a0z = quads[0]->posCoef->a0[2];
+ const float dzdx = quads[0]->posCoef->dadx[2];
+ const float dzdy = quads[0]->posCoef->dady[2];
+ unsigned i;
- quad->output.depth[0] = z0;
- quad->output.depth[1] = z0 + dzdx;
- quad->output.depth[2] = z0 + dzdy;
- quad->output.depth[3] = z0 + dzdx + dzdy;
+ for (i = 0; i < nr; i++) {
+ const float fx = (float) quads[i]->input.x0;
+ const float fy = (float) quads[i]->input.y0;
+ const float z0 = a0z + dzdx * fx + dzdy * fy;
- qs->next->run( qs->next, quad );
+ quads[i]->output.depth[0] = z0;
+ quads[i]->output.depth[1] = z0 + dzdx;
+ quads[i]->output.depth[2] = z0 + dzdy;
+ quads[i]->output.depth[3] = z0 + dzdx + dzdy;
+ }
+
+ qs->next->run( qs->next, quads, nr );
}
static void
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_fs.c b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
index cabc54155c..25518c09f4 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
@@ -68,21 +68,18 @@ quad_shade_stage(struct quad_stage *qs)
/**
* Execute fragment shader for the four fragments in the quad.
*/
-static void
+static boolean
shade_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct quad_shade_stage *qss = quad_shade_stage( qs );
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
struct tgsi_exec_machine *machine = qss->machine;
boolean z_written;
-
- /* Consts do not require 16 byte alignment. */
- machine->Consts = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
-
- machine->InterpCoefs = quad->coef;
/* run shader */
quad->inout.mask &= llvmpipe->fs->run( llvmpipe->fs, machine, quad );
+ if (quad->inout.mask == 0)
+ return FALSE;
/* store outputs */
z_written = FALSE;
@@ -129,11 +126,34 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
quad->output.depth[3] = z0 + dzdx + dzdy;
}
- /* shader may cull fragments */
- if (quad->inout.mask) {
- qs->next->run( qs->next, quad );
+ return TRUE;
+}
+
+static void
+shade_quads(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ struct quad_shade_stage *qss = quad_shade_stage( qs );
+ struct llvmpipe_context *llvmpipe = qs->llvmpipe;
+ struct tgsi_exec_machine *machine = qss->machine;
+
+ unsigned i, pass = 0;
+
+ machine->Consts = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
+ machine->InterpCoefs = quads[0]->coef;
+
+ for (i = 0; i < nr; i++) {
+ if (shade_quad(qs, quads[i]))
+ quads[pass++] = quads[i];
}
+
+ if (pass)
+ qs->next->run(qs->next, quads, pass);
}
+
+
+
/**
@@ -174,7 +194,7 @@ lp_quad_shade_stage( struct llvmpipe_context *llvmpipe )
qss->stage.llvmpipe = llvmpipe;
qss->stage.begin = shade_begin;
- qss->stage.run = shade_quad;
+ qss->stage.run = shade_quads;
qss->stage.destroy = shade_destroy;
qss->machine = tgsi_exec_machine_create();
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_occlusion.c b/src/gallium/drivers/llvmpipe/lp_quad_occlusion.c
index 6441ca30f2..c4d5b86d42 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_occlusion.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_occlusion.c
@@ -50,13 +50,15 @@ static unsigned count_bits( unsigned val )
}
static void
-occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad)
+occlusion_count_quads(struct quad_stage *qs, struct quad_header *quads[], unsigned nr)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
+ unsigned i;
- llvmpipe->occlusion_count += count_bits(quad->inout.mask);
+ for (i = 0; i < nr; i++)
+ llvmpipe->occlusion_count += count_bits(quads[i]->inout.mask);
- qs->next->run(qs->next, quad);
+ qs->next->run(qs->next, quads, nr);
}
@@ -78,7 +80,7 @@ struct quad_stage *lp_quad_occlusion_stage( struct llvmpipe_context *llvmpipe )
stage->llvmpipe = llvmpipe;
stage->begin = occlusion_begin;
- stage->run = occlusion_count_quad;
+ stage->run = occlusion_count_quads;
stage->destroy = occlusion_destroy;
return stage;
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_output.c b/src/gallium/drivers/llvmpipe/lp_quad_output.c
index d344b4e3a7..07cc840848 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_output.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_output.c
@@ -38,11 +38,8 @@
* taking mask into account.
*/
static void
-output_quad(struct quad_stage *qs, struct quad_header *quad)
+output_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr)
{
- /* in-tile pos: */
- const int itx = quad->input.x0 % TILE_SIZE;
- const int ity = quad->input.y0 % TILE_SIZE;
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
uint cbuf;
@@ -51,25 +48,35 @@ output_quad(struct quad_stage *qs, struct quad_header *quad)
for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
struct llvmpipe_cached_tile *tile
= lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
- quad->input.x0, quad->input.y0);
- float (*quadColor)[4] = quad->output.color[cbuf];
- int i, j;
+ quads[0]->input.x0,
+ quads[0]->input.y0);
+ int i, j, q;
/* get/swizzle dest colors */
- for (j = 0; j < QUAD_SIZE; j++) {
- if (quad->inout.mask & (1 << j)) {
- int x = itx + (j & 1);
- int y = ity + (j >> 1);
- for (i = 0; i < 4; i++) { /* loop over color chans */
- tile->data.color[y][x][i] = quadColor[i][j];
- }
- if (0) {
- debug_printf("lp write pixel %d,%d: %g, %g, %g\n",
- quad->input.x0 + x,
- quad->input.y0 + y,
- quadColor[0][j],
- quadColor[1][j],
- quadColor[2][j]);
+ for (q = 0; q < nr; q++) {
+ struct quad_header *quad = quads[q];
+ float (*quadColor)[4] = quad->output.color[cbuf];
+
+ /* in-tile pos: */
+ const int itx = quad->input.x0 % TILE_SIZE;
+ const int ity = quad->input.y0 % TILE_SIZE;
+
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (quad->inout.mask & (1 << j)) {
+ int x = itx + (j & 1);
+ int y = ity + (j >> 1);
+ for (i = 0; i < 4; i++) { /* loop over color chans */
+ tile->data.color[y][x][i] = quadColor[i][j];
+ }
+ if (0) {
+ debug_printf("lp write pixel %d,%d: %g, %g, %g\n",
+ quad->input.x0 + x,
+ quad->input.y0 + y,
+ quadColor[0][j],
+ quadColor[1][j],
+ quadColor[2][j]);
+ }
}
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_pipe.c b/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
index d738d08d9e..60ec31eaf3 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
@@ -55,50 +55,52 @@ void
lp_build_quad_pipeline(struct llvmpipe_context *lp)
{
boolean early_depth_test =
- lp->depth_stencil->depth.enabled &&
- lp->framebuffer.zsbuf &&
- !lp->depth_stencil->alpha.enabled &&
- !lp->fs->info.uses_kill &&
- !lp->fs->info.writes_z;
+ lp->depth_stencil->depth.enabled &&
+ lp->framebuffer.zsbuf &&
+ !lp->depth_stencil->alpha.enabled &&
+ !lp->fs->info.uses_kill &&
+ !lp->fs->info.writes_z;
/* build up the pipeline in reverse order... */
- lp->quad.first = lp->quad.output;
-
- if (lp->blend->colormask != 0xf) {
- lp_push_quad_first( lp, lp->quad.colormask );
- }
-
- if (lp->blend->blend_enable ||
- lp->blend->logicop_enable) {
- lp_push_quad_first( lp, lp->quad.blend );
- }
-
- if (lp->active_query_count) {
- lp_push_quad_first( lp, lp->quad.occlusion );
- }
-
- if (lp->rasterizer->poly_smooth ||
- lp->rasterizer->line_smooth ||
- lp->rasterizer->point_smooth) {
- lp_push_quad_first( lp, lp->quad.coverage );
- }
-
- if (!early_depth_test) {
- lp_build_depth_stencil( lp );
- }
-
- if (lp->depth_stencil->alpha.enabled) {
- lp_push_quad_first( lp, lp->quad.alpha_test );
- }
-
- /* XXX always enable shader? */
- if (1) {
- lp_push_quad_first( lp, lp->quad.shade );
- }
-
- if (early_depth_test) {
- lp_build_depth_stencil( lp );
- lp_push_quad_first( lp, lp->quad.earlyz );
- }
+
+ /* Color combine
+ */
+ lp->quad.first = lp->quad.output;
+
+ if (lp->blend->colormask != 0xf) {
+ lp_push_quad_first( lp, lp->quad.colormask );
+ }
+
+ if (lp->blend->blend_enable ||
+ lp->blend->logicop_enable) {
+ lp_push_quad_first( lp, lp->quad.blend );
+ }
+
+ if (lp->rasterizer->poly_smooth ||
+ lp->rasterizer->line_smooth ||
+ lp->rasterizer->point_smooth) {
+ lp_push_quad_first( lp, lp->quad.coverage );
+ }
+
+ /* Shade/Depth/Stencil/Alpha
+ */
+ if (lp->active_query_count) {
+ lp_push_quad_first( lp, lp->quad.occlusion );
+ }
+
+ if (!early_depth_test) {
+ lp_build_depth_stencil( lp );
+ }
+
+ if (lp->depth_stencil->alpha.enabled) {
+ lp_push_quad_first( lp, lp->quad.alpha_test );
+ }
+
+ lp_push_quad_first( lp, lp->quad.shade );
+
+ if (early_depth_test) {
+ lp_build_depth_stencil( lp );
+ lp_push_quad_first( lp, lp->quad.earlyz );
+ }
}
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_pipe.h b/src/gallium/drivers/llvmpipe/lp_quad_pipe.h
index 4c3efdee69..5c8c7b3a73 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_pipe.h
+++ b/src/gallium/drivers/llvmpipe/lp_quad_pipe.h
@@ -49,7 +49,7 @@ struct quad_stage {
void (*begin)(struct quad_stage *qs);
/** the stage action */
- void (*run)(struct quad_stage *qs, struct quad_header *quad);
+ void (*run)(struct quad_stage *qs, struct quad_header *quad[], unsigned nr);
void (*destroy)(struct quad_stage *qs);
};
@@ -69,6 +69,6 @@ struct quad_stage *lp_quad_output_stage( struct llvmpipe_context *llvmpipe );
void lp_build_quad_pipeline(struct llvmpipe_context *lp);
-void lp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad);
+boolean lp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad);
#endif /* LP_QUAD_PIPE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_stencil.c b/src/gallium/drivers/llvmpipe/lp_quad_stencil.c
index 229f0d054d..0acfa7cb68 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_stencil.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_stencil.c
@@ -198,7 +198,8 @@ apply_stencil_op(ubyte stencilVals[QUAD_SIZE],
* depth testing.
*/
static void
-stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
+stencil_test_quad(struct quad_stage *qs, struct quad_header *quads[],
+ unsigned nr)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
struct pipe_surface *ps = llvmpipe->framebuffer.zsbuf;
@@ -206,9 +207,12 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
ubyte ref, wrtMask, valMask;
ubyte stencilVals[QUAD_SIZE];
struct llvmpipe_cached_tile *tile
- = lp_get_cached_tile(llvmpipe->zsbuf_cache, quad->input.x0, quad->input.y0);
- uint j;
- uint face = quad->input.facing;
+ = lp_get_cached_tile(llvmpipe->zsbuf_cache,
+ quads[0]->input.x0,
+ quads[0]->input.y0);
+ uint face = quads[0]->input.facing;
+ uint pass = 0;
+ uint j, q;
if (!llvmpipe->depth_stencil->stencil[1].enabled) {
/* single-sided stencil test, use front (face=0) state */
@@ -227,103 +231,110 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
assert(ps); /* shouldn't get here if there's no stencil buffer */
- /* get stencil values from cached tile */
- switch (ps->format) {
- case PIPE_FORMAT_S8Z24_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- stencilVals[j] = tile->data.depth32[y][x] >> 24;
- }
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- stencilVals[j] = tile->data.depth32[y][x] & 0xff;
- }
- break;
- case PIPE_FORMAT_S8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- stencilVals[j] = tile->data.stencil8[y][x];
+ for (q = 0; q < nr; q++) {
+ struct quad_header *quad = quads[q];
+
+ /* get stencil values from cached tile */
+ switch (ps->format) {
+ case PIPE_FORMAT_S8Z24_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ stencilVals[j] = tile->data.depth32[y][x] >> 24;
+ }
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ stencilVals[j] = tile->data.depth32[y][x] & 0xff;
+ }
+ break;
+ case PIPE_FORMAT_S8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ stencilVals[j] = tile->data.stencil8[y][x];
+ }
+ break;
+ default:
+ assert(0);
}
- break;
- default:
- assert(0);
- }
- /* do the stencil test first */
- {
- unsigned passMask, failMask;
- passMask = do_stencil_test(stencilVals, func, ref, valMask);
- failMask = quad->inout.mask & ~passMask;
- quad->inout.mask &= passMask;
+ /* do the stencil test first */
+ {
+ unsigned passMask, failMask;
+ passMask = do_stencil_test(stencilVals, func, ref, valMask);
+ failMask = quad->inout.mask & ~passMask;
+ quad->inout.mask &= passMask;
- if (failOp != PIPE_STENCIL_OP_KEEP) {
- apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask);
+ if (failOp != PIPE_STENCIL_OP_KEEP) {
+ apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask);
+ }
}
- }
- if (quad->inout.mask) {
- /* now the pixels that passed the stencil test are depth tested */
- if (llvmpipe->depth_stencil->depth.enabled) {
- const unsigned origMask = quad->inout.mask;
+ if (quad->inout.mask) {
+ /* now the pixels that passed the stencil test are depth tested */
+ if (llvmpipe->depth_stencil->depth.enabled) {
+ const unsigned origMask = quad->inout.mask;
- lp_depth_test_quad(qs, quad); /* quad->mask is updated */
+ lp_depth_test_quad(qs, quad); /* quad->mask is updated */
- /* update stencil buffer values according to z pass/fail result */
- if (zFailOp != PIPE_STENCIL_OP_KEEP) {
- const unsigned failMask = origMask & ~quad->inout.mask;
- apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask);
- }
+ /* update stencil buffer values according to z pass/fail result */
+ if (zFailOp != PIPE_STENCIL_OP_KEEP) {
+ const unsigned failMask = origMask & ~quad->inout.mask;
+ apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask);
+ }
- if (zPassOp != PIPE_STENCIL_OP_KEEP) {
- const unsigned passMask = origMask & quad->inout.mask;
- apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask);
+ if (zPassOp != PIPE_STENCIL_OP_KEEP) {
+ const unsigned passMask = origMask & quad->inout.mask;
+ apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask);
+ }
+ }
+ else {
+ /* no depth test, apply Zpass operator to stencil buffer values */
+ apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask);
}
- }
- else {
- /* no depth test, apply Zpass operator to stencil buffer values */
- apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask);
- }
-
- }
- /* put new stencil values into cached tile */
- switch (ps->format) {
- case PIPE_FORMAT_S8Z24_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- uint s8z24 = tile->data.depth32[y][x];
- s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff);
- tile->data.depth32[y][x] = s8z24;
}
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- uint z24s8 = tile->data.depth32[y][x];
- z24s8 = (z24s8 & 0xffffff00) | stencilVals[j];
- tile->data.depth32[y][x] = z24s8;
- }
- break;
- case PIPE_FORMAT_S8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->input.x0 % TILE_SIZE + (j & 1);
- int y = quad->input.y0 % TILE_SIZE + (j >> 1);
- tile->data.stencil8[y][x] = stencilVals[j];
+
+ /* put new stencil values into cached tile */
+ switch (ps->format) {
+ case PIPE_FORMAT_S8Z24_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ uint s8z24 = tile->data.depth32[y][x];
+ s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff);
+ tile->data.depth32[y][x] = s8z24;
+ }
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ uint z24s8 = tile->data.depth32[y][x];
+ z24s8 = (z24s8 & 0xffffff00) | stencilVals[j];
+ tile->data.depth32[y][x] = z24s8;
+ }
+ break;
+ case PIPE_FORMAT_S8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.stencil8[y][x] = stencilVals[j];
+ }
+ break;
+ default:
+ assert(0);
}
- break;
- default:
- assert(0);
+
+ if (quad->inout.mask)
+ quads[pass++] = q;
}
- if (quad->inout.mask)
- qs->next->run(qs->next, quad);
+ if (pass)
+ qs->next->run(qs->next, quads, pass);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_stipple.c b/src/gallium/drivers/llvmpipe/lp_quad_stipple.c
index 616394619a..429a218540 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_stipple.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_stipple.c
@@ -14,40 +14,46 @@
* Apply polygon stipple to quads produced by triangle rasterization
*/
static void
-stipple_quad(struct quad_stage *qs, struct quad_header *quad)
+stipple_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr)
{
static const uint bit31 = 1 << 31;
static const uint bit30 = 1 << 30;
+ unsigned pass = nr;
- if (quad->input.prim == QUAD_PRIM_TRI) {
+ if (quads[0]->input.prim == QUAD_PRIM_TRI) {
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
- /* need to invert Y to index into OpenGL's stipple pattern */
- const int col0 = quad->input.x0 % 32;
- const int y0 = quad->input.y0;
- const int y1 = y0 + 1;
- const uint stipple0 = llvmpipe->poly_stipple.stipple[y0 % 32];
- const uint stipple1 = llvmpipe->poly_stipple.stipple[y1 % 32];
+ unsigned q;
- /* turn off quad mask bits that fail the stipple test */
- if ((stipple0 & (bit31 >> col0)) == 0)
- quad->inout.mask &= ~MASK_TOP_LEFT;
+ pass = 0;
- if ((stipple0 & (bit30 >> col0)) == 0)
- quad->inout.mask &= ~MASK_TOP_RIGHT;
+ for (q = 0; q < nr; q++) {
+ struct quad_header *quad = quads[q];
- if ((stipple1 & (bit31 >> col0)) == 0)
- quad->inout.mask &= ~MASK_BOTTOM_LEFT;
+ const int col0 = quad->input.x0 % 32;
+ const int y0 = quad->input.y0;
+ const int y1 = y0 + 1;
+ const uint stipple0 = llvmpipe->poly_stipple.stipple[y0 % 32];
+ const uint stipple1 = llvmpipe->poly_stipple.stipple[y1 % 32];
- if ((stipple1 & (bit30 >> col0)) == 0)
- quad->inout.mask &= ~MASK_BOTTOM_RIGHT;
+ /* turn off quad mask bits that fail the stipple test */
+ if ((stipple0 & (bit31 >> col0)) == 0)
+ quad->inout.mask &= ~MASK_TOP_LEFT;
- if (!quad->inout.mask) {
- /* all fragments failed stipple test, end of quad pipeline */
- return;
+ if ((stipple0 & (bit30 >> col0)) == 0)
+ quad->inout.mask &= ~MASK_TOP_RIGHT;
+
+ if ((stipple1 & (bit31 >> col0)) == 0)
+ quad->inout.mask &= ~MASK_BOTTOM_LEFT;
+
+ if ((stipple1 & (bit30 >> col0)) == 0)
+ quad->inout.mask &= ~MASK_BOTTOM_RIGHT;
+
+ if (quad->inout.mask)
+ quads[pass++] = quad;
}
}
- qs->next->run(qs->next, quad);
+ qs->next->run(qs->next, quads, pass);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 783f36bc7f..9a15a0d32b 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -172,7 +172,7 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
if (quad->inout.mask) {
struct llvmpipe_context *lp = setup->llvmpipe;
- lp->quad.first->run( lp->quad.first, quad );
+ lp->quad.first->run( lp->quad.first, &quad, 1 );
}
}
@@ -193,7 +193,7 @@ emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
if (mask & 4) setup->numFragsEmitted++;
if (mask & 8) setup->numFragsEmitted++;
#endif
- lp->quad.first->run( lp->quad.first, quad );
+ lp->quad.first->run( lp->quad.first, &quad, 1 );
#if DEBUG_FRAGS
mask = quad->inout.mask;
if (mask & 1) setup->numFragsWritten++;