From aa5db684382bd8662a83ca09ed000e4a5a1013f9 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 16 Jul 2009 14:14:32 +0100 Subject: softpipe: remove backwards dependency from tilecache to softpipe The tile cache is a utility, it shouldn't know anything about the entity which is making use of it (ie softpipe). Remove softpipe parameter to all the tilecache function calls, and also remove the need to keep a softpipe pointer in the sampler structs. --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/softpipe/sp_quad_depth_test.c') diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index d463930bae..768b9275b3 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -60,7 +60,7 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) unsigned zmask = 0; unsigned j; struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); + = sp_get_cached_tile(softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); assert(ps); /* shouldn't get here if there's no zbuffer */ -- cgit v1.2.3 From ab9fb5167023a26566b53e98f206dd73a18000f3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 24 Jul 2009 16:49:35 +0100 Subject: softpipe: expand quad pipeline to process >1 quad at a time This is part one -- we still only pass a single quad down, but the code can now cope with more. The quads must all be from the same tile. --- src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 106 ++-- src/gallium/drivers/softpipe/sp_quad_blend.c | 730 +++++++++++----------- src/gallium/drivers/softpipe/sp_quad_colormask.c | 15 +- src/gallium/drivers/softpipe/sp_quad_coverage.c | 48 +- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 23 +- src/gallium/drivers/softpipe/sp_quad_earlyz.c | 28 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 40 +- src/gallium/drivers/softpipe/sp_quad_occlusion.c | 10 +- src/gallium/drivers/softpipe/sp_quad_output.c | 49 +- src/gallium/drivers/softpipe/sp_quad_pipe.c | 88 +-- src/gallium/drivers/softpipe/sp_quad_pipe.h | 4 +- src/gallium/drivers/softpipe/sp_quad_stencil.c | 185 +++--- src/gallium/drivers/softpipe/sp_quad_stipple.c | 48 +- src/gallium/drivers/softpipe/sp_setup.c | 4 +- 14 files changed, 745 insertions(+), 633 deletions(-) (limited to 'src/gallium/drivers/softpipe/sp_quad_depth_test.c') diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 0845bae0e6..3a282208b6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -9,76 +9,80 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" +#define ALPHATEST( FUNC, COMP ) \ + static void \ + alpha_test_quads_##FUNC( struct quad_stage *qs, \ + struct quad_header *quads[], \ + unsigned nr ) \ + { \ + const float ref = qs->softpipe->depth_stencil->alpha.ref_value; \ + const uint cbuf = 0; /* only output[0].alpha is tested */ \ + unsigned pass_nr = 0; \ + unsigned i; \ + \ + for (i = 0; i < nr; i++) { \ + const float *aaaa = quads[i]->output.color[cbuf][3]; \ + unsigned passMask = 0; \ + \ + if (aaaa[0] COMP ref) passMask |= (1 << 0); \ + if (aaaa[1] COMP ref) passMask |= (1 << 1); \ + if (aaaa[2] COMP ref) passMask |= (1 << 2); \ + if (aaaa[3] COMP ref) passMask |= (1 << 3); \ + \ + quads[i]->inout.mask &= passMask; \ + \ + if (quads[i]->inout.mask) \ + quads[pass_nr++] = quads[i]; \ + } \ + \ + if (pass_nr) \ + qs->next->run(qs->next, quads, pass_nr); \ + } + + +ALPHATEST( LESS, < ) +ALPHATEST( EQUAL, == ) +ALPHATEST( LEQUAL, <= ) +ALPHATEST( GREATER, > ) +ALPHATEST( NOTEQUAL, != ) +ALPHATEST( GEQUAL, >= ) + +/* XXX: Incorporate into shader using KILP. + */ static void -alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) +alpha_test_quad(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { - struct softpipe_context *softpipe = qs->softpipe; - const float ref = softpipe->depth_stencil->alpha.ref_value; - unsigned passMask = 0x0, j; - const uint cbuf = 0; /* only output[0].alpha is tested */ - const float *aaaa = quad->output.color[cbuf][3]; - - switch (softpipe->depth_stencil->alpha.func) { - case PIPE_FUNC_NEVER: - break; + switch (qs->softpipe->depth_stencil->alpha.func) { case PIPE_FUNC_LESS: - /* - * If mask were an array [4] we could do this SIMD-style: - * passMask = (quad->outputs.color[0][3] <= vec4(ref)); - */ - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] < ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_LESS( qs, quads, nr ); break; case PIPE_FUNC_EQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] == ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_EQUAL( qs, quads, nr ); break; case PIPE_FUNC_LEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] <= ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_LEQUAL( qs, quads, nr ); break; case PIPE_FUNC_GREATER: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] > ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_GREATER( qs, quads, nr ); break; case PIPE_FUNC_NOTEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] != ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_NOTEQUAL( qs, quads, nr ); break; case PIPE_FUNC_GEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] >= ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_GEQUAL( qs, quads, nr ); break; case PIPE_FUNC_ALWAYS: - passMask = MASK_ALL; + assert(0); /* should be caught earlier */ + qs->next->run(qs->next, quads, nr); break; + case PIPE_FUNC_NEVER: default: - assert(0); + assert(0); /* should be caught earlier */ + return; } - - quad->inout.mask &= passMask; - - if (quad->inout.mask) - qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 04b5daf3a4..fdf1bb4552 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -117,10 +117,16 @@ do { \ static void -logicop_quad(struct quad_stage *qs, struct quad_header *quad) +logicop_quad(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { struct softpipe_context *softpipe = qs->softpipe; uint cbuf; + struct softpipe_cached_tile * + tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], + quads[0]->input.x0, + quads[0]->input.y0); /* loop over colorbuffer outputs */ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { @@ -129,165 +135,161 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) uint *src4 = (uint *) src; uint *dst4 = (uint *) dst; uint *res4 = (uint *) res; - struct softpipe_cached_tile * - tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], - quad->input.x0, quad->input.y0); - float (*quadColor)[4] = quad->output.color[cbuf]; uint i, j; - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + for (i = 0; i < nr; i++) { + struct quad_header *quad = quads[i]; + float (*quadColor)[4] = quad->output.color[cbuf]; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } } - } - /* convert to ubyte */ - for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ - dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ - dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ - dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ - dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ - - src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ - src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ - src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ - src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ - } + /* convert to ubyte */ + for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ + dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ + dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ + dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ + dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ + + src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ + src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ + src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ + src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ + } - switch (softpipe->blend->logicop_func) { - case PIPE_LOGICOP_CLEAR: - for (j = 0; j < 4; j++) - res4[j] = 0; - break; - case PIPE_LOGICOP_NOR: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] | dst4[j]); - break; - case PIPE_LOGICOP_AND_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_COPY_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j]; - break; - case PIPE_LOGICOP_AND_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & ~dst4[j]; - break; - case PIPE_LOGICOP_INVERT: - for (j = 0; j < 4; j++) - res4[j] = ~dst4[j]; - break; - case PIPE_LOGICOP_XOR: - for (j = 0; j < 4; j++) - res4[j] = dst4[j] ^ src4[j]; - break; - case PIPE_LOGICOP_NAND: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] & dst4[j]); - break; - case PIPE_LOGICOP_AND: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_EQUIV: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] ^ dst4[j]); - break; - case PIPE_LOGICOP_NOOP: - for (j = 0; j < 4; j++) - res4[j] = dst4[j]; - break; - case PIPE_LOGICOP_OR_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_COPY: - for (j = 0; j < 4; j++) - res4[j] = src4[j]; - break; - case PIPE_LOGICOP_OR_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | ~dst4[j]; - break; - case PIPE_LOGICOP_OR: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_SET: - for (j = 0; j < 4; j++) - res4[j] = ~0; - break; - default: - assert(0); - } + switch (softpipe->blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: + for (j = 0; j < 4; j++) + res4[j] = 0; + break; + case PIPE_LOGICOP_NOR: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] | dst4[j]); + break; + case PIPE_LOGICOP_AND_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_COPY_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j]; + break; + case PIPE_LOGICOP_AND_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & ~dst4[j]; + break; + case PIPE_LOGICOP_INVERT: + for (j = 0; j < 4; j++) + res4[j] = ~dst4[j]; + break; + case PIPE_LOGICOP_XOR: + for (j = 0; j < 4; j++) + res4[j] = dst4[j] ^ src4[j]; + break; + case PIPE_LOGICOP_NAND: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] & dst4[j]); + break; + case PIPE_LOGICOP_AND: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_EQUIV: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] ^ dst4[j]); + break; + case PIPE_LOGICOP_NOOP: + for (j = 0; j < 4; j++) + res4[j] = dst4[j]; + break; + case PIPE_LOGICOP_OR_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_COPY: + for (j = 0; j < 4; j++) + res4[j] = src4[j]; + break; + case PIPE_LOGICOP_OR_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | ~dst4[j]; + break; + case PIPE_LOGICOP_OR: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_SET: + for (j = 0; j < 4; j++) + res4[j] = ~0; + break; + default: + assert(0); + } - for (j = 0; j < 4; j++) { - quadColor[j][0] = ubyte_to_float(res[j][0]); - quadColor[j][1] = ubyte_to_float(res[j][1]); - quadColor[j][2] = ubyte_to_float(res[j][2]); - quadColor[j][3] = ubyte_to_float(res[j][3]); + for (j = 0; j < 4; j++) { + quadColor[j][0] = ubyte_to_float(res[j][0]); + quadColor[j][1] = ubyte_to_float(res[j][1]); + quadColor[j][2] = ubyte_to_float(res[j][2]); + quadColor[j][3] = ubyte_to_float(res[j][3]); + } } } - - /* pass quad to next stage */ - qs->next->run(qs->next, quad); } - - static void -blend_quad(struct quad_stage *qs, struct quad_header *quad) +blend_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; - struct softpipe_context *softpipe = qs->softpipe; uint cbuf; - if (softpipe->blend->logicop_enable) { - logicop_quad(qs, quad); - return; - } - /* loop over colorbuffer outputs */ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], - quad->input.x0, quad->input.y0); - float (*quadColor)[4] = quad->output.color[cbuf]; - uint i, j; - - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + quads[0]->input.x0, + quads[0]->input.y0); + uint q, i, j; + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[cbuf]; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } } - } - /* - * Compute src/first term RGB - */ - switch (softpipe->blend->rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[0], quadColor[0]); /* R */ - VEC4_COPY(source[1], quadColor[1]); /* G */ - VEC4_COPY(source[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: + /* + * Compute src/first term RGB + */ + switch (softpipe->blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[0], quadColor[0]); /* R */ + VEC4_COPY(source[1], quadColor[1]); /* G */ + VEC4_COPY(source[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: { const float *alpha = quadColor[3]; VEC4_MUL(source[0], quadColor[0], alpha); /* R */ @@ -295,12 +297,12 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], alpha); /* B */ } break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: { const float *alpha = dest[3]; VEC4_MUL(source[0], quadColor[0], alpha); /* R */ @@ -308,7 +310,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], alpha); /* B */ } break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: { const float *alpha = quadColor[3]; float diff[4], temp[4]; @@ -319,7 +321,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], temp); /* B */ } break; - case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ @@ -330,7 +332,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], comp); /* B */ } break; - case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_CONST_ALPHA: { float alpha[4]; VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); @@ -339,18 +341,18 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], alpha); /* B */ } break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[0], zero); /* R */ - VEC4_COPY(source[1], zero); /* G */ - VEC4_COPY(source[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: { float inv_comp[4]; VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ @@ -361,7 +363,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ } break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float inv_alpha[4]; VEC4_SUB(inv_alpha, one, quadColor[3]); @@ -370,7 +372,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ } break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: { float inv_alpha[4]; VEC4_SUB(inv_alpha, one, dest[3]); @@ -379,7 +381,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ } break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_DST_COLOR: { float inv_comp[4]; VEC4_SUB(inv_comp, one, dest[0]); /* R */ @@ -390,7 +392,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ } break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: { float inv_comp[4]; /* R */ @@ -404,7 +406,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_comp); } break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_alpha[4]; VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); @@ -413,73 +415,73 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ } break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - assert(0); /* to do */ - break; - default: - assert(0); - } + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + assert(0); /* to do */ + break; + default: + assert(0); + } - /* - * Compute src/first term A - */ - switch (softpipe->blend->alpha_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: + /* + * Compute src/first term A + */ + switch (softpipe->blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: { const float *alpha = quadColor[3]; VEC4_MUL(source[3], quadColor[3], alpha); /* A */ } break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* multiply alpha by 1.0 */ - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* multiply alpha by 1.0 */ + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ VEC4_MUL(source[3], quadColor[3], comp); /* A */ } break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float inv_alpha[4]; VEC4_SUB(inv_alpha, one, quadColor[3]); VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ } break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: { float inv_alpha[4]; VEC4_SUB(inv_alpha, one, dest[3]); VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ } break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_comp[4]; /* A */ @@ -487,42 +489,42 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[3], quadColor[3], inv_comp); } break; - default: - assert(0); - } + default: + assert(0); + } - /* - * Compute dest/second term RGB - */ - switch (softpipe->blend->rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ - VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ - VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ - VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ - VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: + /* + * Compute dest/second term RGB + */ + switch (softpipe->blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ + VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ + VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ + VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ + VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ @@ -533,7 +535,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], comp); /* B */ } break; - case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_CONST_ALPHA: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ @@ -542,17 +544,17 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], comp); /* B */ } break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: { float inv_comp[4]; VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ @@ -563,7 +565,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ } break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); @@ -572,7 +574,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ } break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: { float inv_comp[4]; VEC4_SUB(inv_comp, one, dest[3]); /* A */ @@ -581,7 +583,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ } break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_DST_COLOR: { float inv_comp[4]; VEC4_SUB(inv_comp, one, dest[0]); /* R */ @@ -592,7 +594,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ } break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: { float inv_comp[4]; /* R */ @@ -606,7 +608,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], inv_comp); } break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); @@ -615,138 +617,154 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], inv_comp); } break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - default: - assert(0); - } + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + default: + assert(0); + } - /* - * Compute dest/second term A - */ - switch (softpipe->blend->alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: + /* + * Compute dest/second term A + */ + switch (softpipe->blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ VEC4_MUL(dest[3], dest[3], comp); /* A */ } break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ } break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: { float inv_comp[4]; VEC4_SUB(inv_comp, one, dest[3]); /* A */ VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ } break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); VEC4_MUL(dest[3], dest[3], inv_comp); } break; - default: - assert(0); - } + default: + assert(0); + } - /* - * Combine RGB terms - */ - switch (softpipe->blend->rgb_func) { - case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ - break; - default: - assert(0); - } + /* + * Combine RGB terms + */ + switch (softpipe->blend->rgb_func) { + case PIPE_BLEND_ADD: + VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + break; + default: + assert(0); + } - /* - * Combine A terms - */ - switch (softpipe->blend->alpha_func) { - case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ - break; - default: - assert(0); + /* + * Combine A terms + */ + switch (softpipe->blend->alpha_func) { + case PIPE_BLEND_ADD: + VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + break; + default: + assert(0); + } } - } /* cbuf loop */ +} + + +static void +blend_quad(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + struct softpipe_context *softpipe = qs->softpipe; + + if (softpipe->blend->logicop_enable) { + logicop_quad(qs, quads, nr); + } + else if (softpipe->blend->blend_enable) { + blend_quads(qs, quads, nr ); + } /* pass blended quad to next stage */ - qs->next->run(qs->next, quad); + qs->next->run(qs->next, quads, nr); } diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index 89efbe3b02..ac74287473 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -84,12 +84,23 @@ colormask_quad(struct quad_stage *qs, struct quad_header *quad) if (!(softpipe->blend->colormask & PIPE_MASK_A)) COPY_4V(quadColor[3], dest[3]); } +} + +static void +colormask_quads(struct quad_stage *qs, struct quad_header *quads[], + unsigned nr) +{ + unsigned i; + + for (i = 0; i < nr; i++) + colormask_quad(qs, quads[i]); /* pass quad to next stage */ - qs->next->run(qs->next, quad); + qs->next->run(qs->next, quads, nr); } + static void colormask_begin(struct quad_stage *qs) { qs->next->begin(qs->next); @@ -108,7 +119,7 @@ struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = colormask_begin; - stage->run = colormask_quad; + stage->run = colormask_quads; stage->destroy = colormask_destroy; return stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index 4aeee85870..eda5ce8e63 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -42,33 +42,47 @@ /** * Multiply quad's alpha values by the fragment coverage. */ -static void +static INLINE void coverage_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - const uint prim = quad->input.prim; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { + float (*quadColor)[4] = quad->output.color[cbuf]; + unsigned j; + for (j = 0; j < QUAD_SIZE; j++) { + assert(quad->input.coverage[j] >= 0.0); + assert(quad->input.coverage[j] <= 1.0); + quadColor[3][j] *= quad->input.coverage[j]; + } + } +} + + +/* XXX: Incorporate into shader after alpha_test. + */ +static void +coverage_run(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + struct softpipe_context *softpipe = qs->softpipe; + const uint prim = quads[0]->input.prim; + unsigned i; if ((softpipe->rasterizer->poly_smooth && prim == QUAD_PRIM_TRI) || (softpipe->rasterizer->line_smooth && prim == QUAD_PRIM_LINE) || (softpipe->rasterizer->point_smooth && prim == QUAD_PRIM_POINT)) { - uint cbuf; - - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - float (*quadColor)[4] = quad->output.color[cbuf]; - unsigned j; - for (j = 0; j < QUAD_SIZE; j++) { - assert(quad->input.coverage[j] >= 0.0); - assert(quad->input.coverage[j] <= 1.0); - quadColor[3][j] *= quad->input.coverage[j]; - } - } + + for (i = 0; i < nr; i++) + coverage_quad( qs, quads[i] ); } - qs->next->run(qs->next, quad); + qs->next->run(qs->next, quads, nr); } - static void coverage_begin(struct quad_stage *qs) { qs->next->begin(qs->next); @@ -87,7 +101,7 @@ struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = coverage_begin; - stage->run = coverage_quad; + stage->run = coverage_run; stage->destroy = coverage_destroy; return stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 768b9275b3..8f223a7eae 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -49,7 +49,7 @@ * Try to effectively do that with codegen... */ -void +boolean sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; @@ -193,6 +193,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) } quad->inout.mask &= zmask; + if (quad->inout.mask == 0) + return FALSE; if (softpipe->depth_stencil->depth.writemask) { @@ -252,16 +254,25 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); } } + + return TRUE; } static void -depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +depth_test_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { - sp_depth_test_quad(qs, quad); + unsigned i, pass = 0; - if (quad->inout.mask) - qs->next->run(qs->next, quad); + for (i = 0; i < nr; i++) { + if (sp_depth_test_quad(qs, quads[i])) + quads[pass++] = quads[i]; + } + + if (pass) + qs->next->run(qs->next, quads, pass); } @@ -283,7 +294,7 @@ struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = depth_test_begin; - stage->run = depth_test_quad; + stage->run = depth_test_quads; stage->destroy = depth_test_destroy; return stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c index 496fd39ed1..1048d44984 100644 --- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -43,20 +43,26 @@ static void earlyz_quad( struct quad_stage *qs, - struct quad_header *quad ) + struct quad_header *quads[], + unsigned nr ) { - const float fx = (float) quad->input.x0; - const float fy = (float) quad->input.y0; - const float dzdx = quad->posCoef->dadx[2]; - const float dzdy = quad->posCoef->dady[2]; - const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + const float a0z = quads[0]->posCoef->a0[2]; + const float dzdx = quads[0]->posCoef->dadx[2]; + const float dzdy = quads[0]->posCoef->dady[2]; + unsigned i; - quad->output.depth[0] = z0; - quad->output.depth[1] = z0 + dzdx; - quad->output.depth[2] = z0 + dzdy; - quad->output.depth[3] = z0 + dzdx + dzdy; + for (i = 0; i < nr; i++) { + const float fx = (float) quads[i]->input.x0; + const float fy = (float) quads[i]->input.y0; + const float z0 = a0z + dzdx * fx + dzdy * fy; - qs->next->run( qs->next, quad ); + quads[i]->output.depth[0] = z0; + quads[i]->output.depth[1] = z0 + dzdx; + quads[i]->output.depth[2] = z0 + dzdy; + quads[i]->output.depth[3] = z0 + dzdx + dzdy; + } + + qs->next->run( qs->next, quads, nr ); } static void diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 28f8d1a60e..ea5ed3bbd0 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -68,21 +68,18 @@ quad_shade_stage(struct quad_stage *qs) /** * Execute fragment shader for the four fragments in the quad. */ -static void +static boolean shade_quad(struct quad_stage *qs, struct quad_header *quad) { struct quad_shade_stage *qss = quad_shade_stage( qs ); struct softpipe_context *softpipe = qs->softpipe; struct tgsi_exec_machine *machine = qss->machine; boolean z_written; - - /* Consts do not require 16 byte alignment. */ - machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; - - machine->InterpCoefs = quad->coef; /* run shader */ quad->inout.mask &= softpipe->fs->run( softpipe->fs, machine, quad ); + if (quad->inout.mask == 0) + return FALSE; /* store outputs */ z_written = FALSE; @@ -129,11 +126,34 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) quad->output.depth[3] = z0 + dzdx + dzdy; } - /* shader may cull fragments */ - if (quad->inout.mask) { - qs->next->run( qs->next, quad ); + return TRUE; +} + +static void +shade_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + struct quad_shade_stage *qss = quad_shade_stage( qs ); + struct softpipe_context *softpipe = qs->softpipe; + struct tgsi_exec_machine *machine = qss->machine; + + unsigned i, pass = 0; + + machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + machine->InterpCoefs = quads[0]->coef; + + for (i = 0; i < nr; i++) { + if (shade_quad(qs, quads[i])) + quads[pass++] = quads[i]; } + + if (pass) + qs->next->run(qs->next, quads, pass); } + + + /** @@ -174,7 +194,7 @@ sp_quad_shade_stage( struct softpipe_context *softpipe ) qss->stage.softpipe = softpipe; qss->stage.begin = shade_begin; - qss->stage.run = shade_quad; + qss->stage.run = shade_quads; qss->stage.destroy = shade_destroy; qss->machine = tgsi_exec_machine_create(); diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c index dfa7ff3b1d..4adeb16546 100644 --- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -50,13 +50,15 @@ static unsigned count_bits( unsigned val ) } static void -occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad) +occlusion_count_quads(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { struct softpipe_context *softpipe = qs->softpipe; + unsigned i; - softpipe->occlusion_count += count_bits(quad->inout.mask); + for (i = 0; i < nr; i++) + softpipe->occlusion_count += count_bits(quads[i]->inout.mask); - qs->next->run(qs->next, quad); + qs->next->run(qs->next, quads, nr); } @@ -78,7 +80,7 @@ struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = occlusion_begin; - stage->run = occlusion_count_quad; + stage->run = occlusion_count_quads; stage->destroy = occlusion_destroy; return stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index dd8f5377e9..79a222ff58 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -38,11 +38,8 @@ * taking mask into account. */ static void -output_quad(struct quad_stage *qs, struct quad_header *quad) +output_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { - /* in-tile pos: */ - const int itx = quad->input.x0 % TILE_SIZE; - const int ity = quad->input.y0 % TILE_SIZE; struct softpipe_context *softpipe = qs->softpipe; uint cbuf; @@ -51,25 +48,35 @@ output_quad(struct quad_stage *qs, struct quad_header *quad) for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], - quad->input.x0, quad->input.y0); - float (*quadColor)[4] = quad->output.color[cbuf]; - int i, j; + quads[0]->input.x0, + quads[0]->input.y0); + int i, j, q; /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { /* loop over color chans */ - tile->data.color[y][x][i] = quadColor[i][j]; - } - if (0) { - debug_printf("sp write pixel %d,%d: %g, %g, %g\n", - quad->input.x0 + x, - quad->input.y0 + y, - quadColor[0][j], - quadColor[1][j], - quadColor[2][j]); + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[cbuf]; + + /* in-tile pos: */ + const int itx = quad->input.x0 % TILE_SIZE; + const int ity = quad->input.y0 % TILE_SIZE; + + + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } + if (0) { + debug_printf("sp write pixel %d,%d: %g, %g, %g\n", + quad->input.x0 + x, + quad->input.y0 + y, + quadColor[0][j], + quadColor[1][j], + quadColor[2][j]); + } } } } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 3a3359d303..594fade455 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -55,50 +55,52 @@ void sp_build_quad_pipeline(struct softpipe_context *sp) { boolean early_depth_test = - sp->depth_stencil->depth.enabled && - sp->framebuffer.zsbuf && - !sp->depth_stencil->alpha.enabled && - !sp->fs->info.uses_kill && - !sp->fs->info.writes_z; + sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf && + !sp->depth_stencil->alpha.enabled && + !sp->fs->info.uses_kill && + !sp->fs->info.writes_z; /* build up the pipeline in reverse order... */ - sp->quad.first = sp->quad.output; - - if (sp->blend->colormask != 0xf) { - sp_push_quad_first( sp, sp->quad.colormask ); - } - - if (sp->blend->blend_enable || - sp->blend->logicop_enable) { - sp_push_quad_first( sp, sp->quad.blend ); - } - - if (sp->active_query_count) { - sp_push_quad_first( sp, sp->quad.occlusion ); - } - - if (sp->rasterizer->poly_smooth || - sp->rasterizer->line_smooth || - sp->rasterizer->point_smooth) { - sp_push_quad_first( sp, sp->quad.coverage ); - } - - if (!early_depth_test) { - sp_build_depth_stencil( sp ); - } - - if (sp->depth_stencil->alpha.enabled) { - sp_push_quad_first( sp, sp->quad.alpha_test ); - } - - /* XXX always enable shader? */ - if (1) { - sp_push_quad_first( sp, sp->quad.shade ); - } - - if (early_depth_test) { - sp_build_depth_stencil( sp ); - sp_push_quad_first( sp, sp->quad.earlyz ); - } + + /* Color combine + */ + sp->quad.first = sp->quad.output; + + if (sp->blend->colormask != 0xf) { + sp_push_quad_first( sp, sp->quad.colormask ); + } + + if (sp->blend->blend_enable || + sp->blend->logicop_enable) { + sp_push_quad_first( sp, sp->quad.blend ); + } + + if (sp->rasterizer->poly_smooth || + sp->rasterizer->line_smooth || + sp->rasterizer->point_smooth) { + sp_push_quad_first( sp, sp->quad.coverage ); + } + + /* Shade/Depth/Stencil/Alpha + */ + if (sp->active_query_count) { + sp_push_quad_first( sp, sp->quad.occlusion ); + } + + if (!early_depth_test) { + sp_build_depth_stencil( sp ); + } + + if (sp->depth_stencil->alpha.enabled) { + sp_push_quad_first( sp, sp->quad.alpha_test ); + } + + sp_push_quad_first( sp, sp->quad.shade ); + + if (early_depth_test) { + sp_build_depth_stencil( sp ); + sp_push_quad_first( sp, sp->quad.earlyz ); + } } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.h b/src/gallium/drivers/softpipe/sp_quad_pipe.h index 0e40586ffc..add31ba705 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.h +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.h @@ -49,7 +49,7 @@ struct quad_stage { void (*begin)(struct quad_stage *qs); /** the stage action */ - void (*run)(struct quad_stage *qs, struct quad_header *quad); + void (*run)(struct quad_stage *qs, struct quad_header *quad[], unsigned nr); void (*destroy)(struct quad_stage *qs); }; @@ -69,6 +69,6 @@ struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); void sp_build_quad_pipeline(struct softpipe_context *sp); -void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); +boolean sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); #endif /* SP_QUAD_PIPE_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c index 34a8d9e9f6..706dd2f756 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -198,7 +198,8 @@ apply_stencil_op(ubyte stencilVals[QUAD_SIZE], * depth testing. */ static void -stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) +stencil_test_quad(struct quad_stage *qs, struct quad_header *quads[], + unsigned nr) { struct softpipe_context *softpipe = qs->softpipe; struct pipe_surface *ps = softpipe->framebuffer.zsbuf; @@ -206,9 +207,12 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) ubyte ref, wrtMask, valMask; ubyte stencilVals[QUAD_SIZE]; struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); - uint j; - uint face = quad->input.facing; + = sp_get_cached_tile(softpipe->zsbuf_cache, + quads[0]->input.x0, + quads[0]->input.y0); + uint face = quads[0]->input.facing; + uint pass = 0; + uint j, q; if (!softpipe->depth_stencil->stencil[1].enabled) { /* single-sided stencil test, use front (face=0) state */ @@ -227,103 +231,110 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(ps); /* shouldn't get here if there's no stencil buffer */ - /* get stencil values from cached tile */ - switch (ps->format) { - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.depth32[y][x] >> 24; - } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.depth32[y][x] & 0xff; - } - break; - case PIPE_FORMAT_S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.stencil8[y][x]; + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + + /* get stencil values from cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] >> 24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] & 0xff; + } + break; + case PIPE_FORMAT_S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.stencil8[y][x]; + } + break; + default: + assert(0); } - break; - default: - assert(0); - } - /* do the stencil test first */ - { - unsigned passMask, failMask; - passMask = do_stencil_test(stencilVals, func, ref, valMask); - failMask = quad->inout.mask & ~passMask; - quad->inout.mask &= passMask; + /* do the stencil test first */ + { + unsigned passMask, failMask; + passMask = do_stencil_test(stencilVals, func, ref, valMask); + failMask = quad->inout.mask & ~passMask; + quad->inout.mask &= passMask; - if (failOp != PIPE_STENCIL_OP_KEEP) { - apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); + if (failOp != PIPE_STENCIL_OP_KEEP) { + apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); + } } - } - if (quad->inout.mask) { - /* now the pixels that passed the stencil test are depth tested */ - if (softpipe->depth_stencil->depth.enabled) { - const unsigned origMask = quad->inout.mask; + if (quad->inout.mask) { + /* now the pixels that passed the stencil test are depth tested */ + if (softpipe->depth_stencil->depth.enabled) { + const unsigned origMask = quad->inout.mask; - sp_depth_test_quad(qs, quad); /* quad->mask is updated */ + sp_depth_test_quad(qs, quad); /* quad->mask is updated */ - /* update stencil buffer values according to z pass/fail result */ - if (zFailOp != PIPE_STENCIL_OP_KEEP) { - const unsigned failMask = origMask & ~quad->inout.mask; - apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); - } + /* update stencil buffer values according to z pass/fail result */ + if (zFailOp != PIPE_STENCIL_OP_KEEP) { + const unsigned failMask = origMask & ~quad->inout.mask; + apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); + } - if (zPassOp != PIPE_STENCIL_OP_KEEP) { - const unsigned passMask = origMask & quad->inout.mask; - apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); + if (zPassOp != PIPE_STENCIL_OP_KEEP) { + const unsigned passMask = origMask & quad->inout.mask; + apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); + } + } + else { + /* no depth test, apply Zpass operator to stencil buffer values */ + apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); } - } - else { - /* no depth test, apply Zpass operator to stencil buffer values */ - apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); - } - - } - /* put new stencil values into cached tile */ - switch (ps->format) { - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint s8z24 = tile->data.depth32[y][x]; - s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); - tile->data.depth32[y][x] = s8z24; } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint z24s8 = tile->data.depth32[y][x]; - z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; - tile->data.depth32[y][x] = z24s8; - } - break; - case PIPE_FORMAT_S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.stencil8[y][x] = stencilVals[j]; + + /* put new stencil values into cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint s8z24 = tile->data.depth32[y][x]; + s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); + tile->data.depth32[y][x] = s8z24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint z24s8 = tile->data.depth32[y][x]; + z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; + tile->data.depth32[y][x] = z24s8; + } + break; + case PIPE_FORMAT_S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.stencil8[y][x] = stencilVals[j]; + } + break; + default: + assert(0); } - break; - default: - assert(0); + + if (quad->inout.mask) + quads[pass++] = q; } - if (quad->inout.mask) - qs->next->run(qs->next, quad); + if (pass) + qs->next->run(qs->next, quads, pass); } diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index 07162db7b6..05665d8e26 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -14,40 +14,46 @@ * Apply polygon stipple to quads produced by triangle rasterization */ static void -stipple_quad(struct quad_stage *qs, struct quad_header *quad) +stipple_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { static const uint bit31 = 1 << 31; static const uint bit30 = 1 << 30; + unsigned pass = nr; - if (quad->input.prim == QUAD_PRIM_TRI) { + if (quads[0]->input.prim == QUAD_PRIM_TRI) { struct softpipe_context *softpipe = qs->softpipe; - /* need to invert Y to index into OpenGL's stipple pattern */ - const int col0 = quad->input.x0 % 32; - const int y0 = quad->input.y0; - const int y1 = y0 + 1; - const uint stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; - const uint stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + unsigned q; - /* turn off quad mask bits that fail the stipple test */ - if ((stipple0 & (bit31 >> col0)) == 0) - quad->inout.mask &= ~MASK_TOP_LEFT; + pass = 0; - if ((stipple0 & (bit30 >> col0)) == 0) - quad->inout.mask &= ~MASK_TOP_RIGHT; + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; - if ((stipple1 & (bit31 >> col0)) == 0) - quad->inout.mask &= ~MASK_BOTTOM_LEFT; + const int col0 = quad->input.x0 % 32; + const int y0 = quad->input.y0; + const int y1 = y0 + 1; + const uint stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; + const uint stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; - if ((stipple1 & (bit30 >> col0)) == 0) - quad->inout.mask &= ~MASK_BOTTOM_RIGHT; + /* turn off quad mask bits that fail the stipple test */ + if ((stipple0 & (bit31 >> col0)) == 0) + quad->inout.mask &= ~MASK_TOP_LEFT; - if (!quad->inout.mask) { - /* all fragments failed stipple test, end of quad pipeline */ - return; + if ((stipple0 & (bit30 >> col0)) == 0) + quad->inout.mask &= ~MASK_TOP_RIGHT; + + if ((stipple1 & (bit31 >> col0)) == 0) + quad->inout.mask &= ~MASK_BOTTOM_LEFT; + + if ((stipple1 & (bit30 >> col0)) == 0) + quad->inout.mask &= ~MASK_BOTTOM_RIGHT; + + if (quad->inout.mask) + quads[pass++] = quad; } } - qs->next->run(qs->next, quad); + qs->next->run(qs->next, quads, pass); } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index eaf84ed9de..23dcae89c6 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -172,7 +172,7 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) if (quad->inout.mask) { struct softpipe_context *sp = setup->softpipe; - sp->quad.first->run( sp->quad.first, quad ); + sp->quad.first->run( sp->quad.first, &quad, 1 ); } } @@ -193,7 +193,7 @@ emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) if (mask & 4) setup->numFragsEmitted++; if (mask & 8) setup->numFragsEmitted++; #endif - sp->quad.first->run( sp->quad.first, quad ); + sp->quad.first->run( sp->quad.first, &quad, 1 ); #if DEBUG_FRAGS mask = quad->inout.mask; if (mask & 1) setup->numFragsWritten++; -- cgit v1.2.3 From bac8e34c9e4077d370923773d67fe565ce154849 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 27 Jul 2009 08:17:45 +0100 Subject: softpipe: move all depth/stencil/alpha pixel processing into one stage --- src/gallium/drivers/softpipe/Makefile | 5 - src/gallium/drivers/softpipe/sp_context.c | 12 - src/gallium/drivers/softpipe/sp_context.h | 6 - src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 112 ---- src/gallium/drivers/softpipe/sp_quad_coverage.c | 101 ---- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 659 ++++++++++++++++++---- src/gallium/drivers/softpipe/sp_quad_earlyz.c | 94 --- src/gallium/drivers/softpipe/sp_quad_fs.c | 46 +- src/gallium/drivers/softpipe/sp_quad_occlusion.c | 87 --- src/gallium/drivers/softpipe/sp_quad_pipe.c | 46 +- src/gallium/drivers/softpipe/sp_quad_pipe.h | 2 - src/gallium/drivers/softpipe/sp_quad_stencil.c | 363 ------------ src/gallium/drivers/softpipe/sp_state_derived.c | 4 +- 13 files changed, 580 insertions(+), 957 deletions(-) delete mode 100644 src/gallium/drivers/softpipe/sp_quad_alpha_test.c delete mode 100644 src/gallium/drivers/softpipe/sp_quad_coverage.c delete mode 100644 src/gallium/drivers/softpipe/sp_quad_earlyz.c delete mode 100644 src/gallium/drivers/softpipe/sp_quad_occlusion.c delete mode 100644 src/gallium/drivers/softpipe/sp_quad_stencil.c (limited to 'src/gallium/drivers/softpipe/sp_quad_depth_test.c') diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index bdc1a5819f..48522abe98 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -16,13 +16,8 @@ C_SOURCES = \ sp_prim_vbuf.c \ sp_quad_pipe.c \ sp_quad_stipple.c \ - sp_quad_earlyz.c \ sp_quad_depth_test.c \ - sp_quad_stencil.c \ sp_quad_fs.c \ - sp_quad_alpha_test.c \ - sp_quad_occlusion.c \ - sp_quad_coverage.c \ sp_quad_blend.c \ sp_screen.c \ sp_setup.c \ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 28a0dd62ac..e35c6b3aec 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -88,14 +88,8 @@ static void softpipe_destroy( struct pipe_context *pipe ) if (softpipe->draw) draw_destroy( softpipe->draw ); - softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); - softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); - softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); - softpipe->quad.coverage->destroy( softpipe->quad.coverage ); softpipe->quad.blend->destroy( softpipe->quad.blend ); for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) @@ -230,14 +224,8 @@ softpipe_create( struct pipe_screen *screen ) /* setup quad rendering stages */ - softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); - softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); softpipe->quad.shade = sp_quad_shade_stage(softpipe); - softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); - softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); - softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); softpipe->quad.blend = sp_quad_blend_stage(softpipe); /* vertex shader samplers */ diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index b76ff610a3..fa3306c020 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -114,14 +114,8 @@ struct softpipe_context { /** Software quad rendering pipeline */ struct { - struct quad_stage *polygon_stipple; - struct quad_stage *earlyz; struct quad_stage *shade; - struct quad_stage *alpha_test; - struct quad_stage *stencil_test; struct quad_stage *depth_test; - struct quad_stage *occlusion; - struct quad_stage *coverage; struct quad_stage *blend; struct quad_stage *first; /**< points to one of the above stages */ diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c deleted file mode 100644 index 3a282208b6..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ /dev/null @@ -1,112 +0,0 @@ - -/** - * quad alpha test - */ - -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_quad_pipe.h" -#include "pipe/p_defines.h" -#include "util/u_memory.h" - -#define ALPHATEST( FUNC, COMP ) \ - static void \ - alpha_test_quads_##FUNC( struct quad_stage *qs, \ - struct quad_header *quads[], \ - unsigned nr ) \ - { \ - const float ref = qs->softpipe->depth_stencil->alpha.ref_value; \ - const uint cbuf = 0; /* only output[0].alpha is tested */ \ - unsigned pass_nr = 0; \ - unsigned i; \ - \ - for (i = 0; i < nr; i++) { \ - const float *aaaa = quads[i]->output.color[cbuf][3]; \ - unsigned passMask = 0; \ - \ - if (aaaa[0] COMP ref) passMask |= (1 << 0); \ - if (aaaa[1] COMP ref) passMask |= (1 << 1); \ - if (aaaa[2] COMP ref) passMask |= (1 << 2); \ - if (aaaa[3] COMP ref) passMask |= (1 << 3); \ - \ - quads[i]->inout.mask &= passMask; \ - \ - if (quads[i]->inout.mask) \ - quads[pass_nr++] = quads[i]; \ - } \ - \ - if (pass_nr) \ - qs->next->run(qs->next, quads, pass_nr); \ - } - - -ALPHATEST( LESS, < ) -ALPHATEST( EQUAL, == ) -ALPHATEST( LEQUAL, <= ) -ALPHATEST( GREATER, > ) -ALPHATEST( NOTEQUAL, != ) -ALPHATEST( GEQUAL, >= ) - - -/* XXX: Incorporate into shader using KILP. - */ -static void -alpha_test_quad(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - switch (qs->softpipe->depth_stencil->alpha.func) { - case PIPE_FUNC_LESS: - alpha_test_quads_LESS( qs, quads, nr ); - break; - case PIPE_FUNC_EQUAL: - alpha_test_quads_EQUAL( qs, quads, nr ); - break; - case PIPE_FUNC_LEQUAL: - alpha_test_quads_LEQUAL( qs, quads, nr ); - break; - case PIPE_FUNC_GREATER: - alpha_test_quads_GREATER( qs, quads, nr ); - break; - case PIPE_FUNC_NOTEQUAL: - alpha_test_quads_NOTEQUAL( qs, quads, nr ); - break; - case PIPE_FUNC_GEQUAL: - alpha_test_quads_GEQUAL( qs, quads, nr ); - break; - case PIPE_FUNC_ALWAYS: - assert(0); /* should be caught earlier */ - qs->next->run(qs->next, quads, nr); - break; - case PIPE_FUNC_NEVER: - default: - assert(0); /* should be caught earlier */ - return; - } -} - - -static void alpha_test_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void alpha_test_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage * -sp_quad_alpha_test_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = alpha_test_begin; - stage->run = alpha_test_quad; - stage->destroy = alpha_test_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c deleted file mode 100644 index 989e997f81..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ /dev/null @@ -1,101 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * \brief Apply AA coverage to quad alpha valus - * \author Brian Paul - */ - - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_quad_pipe.h" - - -/** - * Multiply quad's alpha values by the fragment coverage. - */ -static INLINE void -coverage_quad(struct quad_stage *qs, struct quad_header *quad) -{ - struct softpipe_context *softpipe = qs->softpipe; - uint cbuf; - - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - float (*quadColor)[4] = quad->output.color[cbuf]; - unsigned j; - for (j = 0; j < QUAD_SIZE; j++) { - assert(quad->input.coverage[j] >= 0.0); - assert(quad->input.coverage[j] <= 1.0); - quadColor[3][j] *= quad->input.coverage[j]; - } - } -} - - -/* XXX: Incorporate into shader after alpha_test. - */ -static void -coverage_run(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - unsigned i; - - for (i = 0; i < nr; i++) - coverage_quad( qs, quads[i] ); - - qs->next->run(qs->next, quads, nr); -} - -static void coverage_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void coverage_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = coverage_begin; - stage->run = coverage_run; - stage->destroy = coverage_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 8f223a7eae..bf65799a81 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -31,61 +31,109 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" +#include "tgsi/tgsi_scan.h" #include "sp_context.h" #include "sp_quad.h" #include "sp_surface.h" #include "sp_quad_pipe.h" #include "sp_tile_cache.h" +#include "sp_state.h" /* for sp_fragment_shader */ -/** - * Do depth testing for a quad. - * Not static since it's used by the stencil code. - */ +struct depth_data { + struct pipe_surface *ps; + enum pipe_format format; + unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ + unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ + ubyte stencilVals[QUAD_SIZE]; + struct softpipe_cached_tile *tile; +}; -/* - * To increase efficiency, we should probably have multiple versions - * of this function that are specifically for Z16, Z32 and FP Z buffers. - * Try to effectively do that with codegen... - */ -boolean -sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) + +static void +get_depth_stencil_values( struct depth_data *data, + const struct quad_header *quad ) { - struct softpipe_context *softpipe = qs->softpipe; - struct pipe_surface *ps = softpipe->framebuffer.zsbuf; - const enum pipe_format format = ps->format; - unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ - unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ - unsigned zmask = 0; unsigned j; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); + const struct softpipe_cached_tile *tile = data->tile; + + switch (data->format) { + case PIPE_FORMAT_Z16_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth16[y][x]; + } + break; + case PIPE_FORMAT_Z32_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth32[y][x]; + } + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; + data->stencilVals[j] = tile->data.depth32[y][x] >> 24; + } + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth32[y][x] >> 8; + data->stencilVals[j] = tile->data.depth32[y][x] & 0xff; + } + break; + default: + assert(0); + } +} - assert(ps); /* shouldn't get here if there's no zbuffer */ +/* If the shader has not been run, interpolate the depth values + * ourselves. + */ +static void +interpolate_quad_depth( struct quad_header *quad ) +{ + const float fx = (float) quad->input.x0; + const float fy = (float) quad->input.y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + + quad->output.depth[0] = z0; + quad->output.depth[1] = z0 + dzdx; + quad->output.depth[2] = z0 + dzdy; + quad->output.depth[3] = z0 + dzdx + dzdy; +} - /* - * Convert quad's float depth values to int depth values (qzzzz). + +static void +convert_quad_depth( struct depth_data *data, + const struct quad_header *quad ) +{ + unsigned j; + + /* Convert quad's float depth values to int depth values (qzzzz). * If the Z buffer stores integer values, we _have_ to do the depth * compares with integers (not floats). Otherwise, the float->int->float * conversion of Z values (which isn't an identity function) will cause * Z-fighting errors. - * - * Also, get the zbuffer values (bzzzz) from the cached tile. */ - switch (format) { + switch (data->format) { case PIPE_FORMAT_Z16_UNORM: { float scale = 65535.0; for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); - } - - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth16[y][x]; + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } } break; @@ -94,47 +142,247 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) double scale = (double) (uint) ~0UL; for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); - } - - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth32[y][x]; + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } } break; case PIPE_FORMAT_X8Z24_UNORM: - /* fall-through */ case PIPE_FORMAT_S8Z24_UNORM: { float scale = (float) ((1 << 24) - 1); for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); - } - - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } } break; case PIPE_FORMAT_Z24X8_UNORM: - /* fall-through */ case PIPE_FORMAT_Z24S8_UNORM: { float scale = (float) ((1 << 24) - 1); for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } + } + break; + default: + assert(0); + } +} - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth32[y][x] >> 8; + + +static void +write_depth_stencil_values( struct depth_data *data, + struct quad_header *quad ) +{ + struct softpipe_cached_tile *tile = data->tile; + unsigned j; + + /* put updated Z values back into cached tile */ + switch (data->format) { + case PIPE_FORMAT_Z16_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth16[y][x] = (ushort) data->bzzzz[j]; + } + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z32_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = data->bzzzz[j]; + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j]; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j]; + } + break; + case PIPE_FORMAT_Z24X8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = data->bzzzz[j] << 8; + } + break; + default: + assert(0); + } +} + + + + +/** Only 8-bit stencil supported */ +#define STENCIL_MAX 0xff + + +/** + * Do the basic stencil test (compare stencil buffer values against the + * reference value. + * + * \param data->stencilVals the stencil values from the stencil buffer + * \param func the stencil func (PIPE_FUNC_x) + * \param ref the stencil reference value + * \param valMask the stencil value mask indicating which bits of the stencil + * values and ref value are to be used. + * \return mask indicating which pixels passed the stencil test + */ +static unsigned +do_stencil_test(struct depth_data *data, + unsigned func, + unsigned ref, unsigned valMask) +{ + unsigned passMask = 0x0; + unsigned j; + + ref &= valMask; + + switch (func) { + case PIPE_FUNC_NEVER: + /* passMask = 0x0 */ + break; + case PIPE_FUNC_LESS: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref < (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref == (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref <= (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref > (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref != (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref >= (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_ALWAYS: + passMask = MASK_ALL; + break; + default: + assert(0); + } + + return passMask; +} + + +/** + * Apply the stencil operator to stencil values. + * + * \param data->stencilVals the stencil buffer values (read and written) + * \param mask indicates which pixels to update + * \param op the stencil operator (PIPE_STENCIL_OP_x) + * \param ref the stencil reference value + * \param wrtMask writemask controlling which bits are changed in the + * stencil values + */ +static void +apply_stencil_op(struct depth_data *data, + unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) +{ + unsigned j; + ubyte newstencil[QUAD_SIZE]; + + for (j = 0; j < QUAD_SIZE; j++) { + newstencil[j] = data->stencilVals[j]; + } + + switch (op) { + case PIPE_STENCIL_OP_KEEP: + /* no-op */ + break; + case PIPE_STENCIL_OP_ZERO: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = 0; + } + } + break; + case PIPE_STENCIL_OP_REPLACE: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ref; + } + } + break; + case PIPE_STENCIL_OP_INCR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (data->stencilVals[j] < STENCIL_MAX) { + newstencil[j] = data->stencilVals[j] + 1; + } + } + } + break; + case PIPE_STENCIL_OP_DECR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (data->stencilVals[j] > 0) { + newstencil[j] = data->stencilVals[j] - 1; + } + } + } + break; + case PIPE_STENCIL_OP_INCR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = data->stencilVals[j] + 1; + } + } + break; + case PIPE_STENCIL_OP_DECR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = data->stencilVals[j] - 1; + } + } + break; + case PIPE_STENCIL_OP_INVERT: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ~data->stencilVals[j]; } } break; @@ -142,6 +390,39 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); } + /* + * update the stencil values + */ + if (wrtMask != STENCIL_MAX) { + /* apply bit-wise stencil buffer writemask */ + for (j = 0; j < QUAD_SIZE; j++) { + data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]); + } + } + else { + for (j = 0; j < QUAD_SIZE; j++) { + data->stencilVals[j] = newstencil[j]; + } + } +} + + + +/* + * To increase efficiency, we should probably have multiple versions + * of this function that are specifically for Z16, Z32 and FP Z buffers. + * Try to effectively do that with codegen... + */ + +static boolean +depth_test_quad(struct quad_stage *qs, + struct depth_data *data, + struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + unsigned zmask = 0; + unsigned j; + switch (softpipe->depth_stencil->depth.func) { case PIPE_FUNC_NEVER: /* zmask = 0 */ @@ -151,37 +432,37 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) * Like this: quad->mask &= (quad->outputs.depth < zzzz); */ for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] < bzzzz[j]) + if (data->qzzzz[j] < data->bzzzz[j]) zmask |= 1 << j; } break; case PIPE_FUNC_EQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] == bzzzz[j]) + if (data->qzzzz[j] == data->bzzzz[j]) zmask |= 1 << j; } break; case PIPE_FUNC_LEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] <= bzzzz[j]) + if (data->qzzzz[j] <= data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_GREATER: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] > bzzzz[j]) + if (data->qzzzz[j] > data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_NOTEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] != bzzzz[j]) + if (data->qzzzz[j] != data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_GEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] >= bzzzz[j]) + if (data->qzzzz[j] >= data->bzzzz[j]) zmask |= (1 << j); } break; @@ -196,83 +477,231 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) if (quad->inout.mask == 0) return FALSE; + /* Update our internal copy only if writemask set. Even if + * depth.writemask is FALSE, may still need to write out buffer + * data due to stencil changes. + */ if (softpipe->depth_stencil->depth.writemask) { - - /* This is also efficient with sse / spe instructions: - */ for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - bzzzz[j] = qzzzz[j]; - } + if (quad->inout.mask & (1 << j)) { + data->bzzzz[j] = data->qzzzz[j]; + } } + } - /* put updated Z values back into cached tile */ - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.depth16[y][x] = (ushort) bzzzz[j]; - } - break; - case PIPE_FORMAT_X8Z24_UNORM: - /* fall-through */ - /* (yes, this falls through to a different case than above) */ - case PIPE_FORMAT_Z32_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.depth32[y][x] = bzzzz[j]; - } - break; - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint s8z24 = tile->data.depth32[y][x]; - s8z24 = (s8z24 & 0xff000000) | bzzzz[j]; - tile->data.depth32[y][x] = s8z24; - } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint z24s8 = tile->data.depth32[y][x]; - z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8); - tile->data.depth32[y][x] = z24s8; + return TRUE; +} + + + +/** + * Do stencil (and depth) testing. Stenciling depends on the outcome of + * depth testing. + */ +static boolean +depth_stencil_test_quad(struct quad_stage *qs, + struct depth_data *data, + struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + unsigned func, zFailOp, zPassOp, failOp; + ubyte ref, wrtMask, valMask; + uint face = quad->input.facing; + + if (!softpipe->depth_stencil->stencil[1].enabled) { + /* single-sided stencil test, use front (face=0) state */ + face = 0; + } + + /* choose front or back face function, operator, etc */ + /* XXX we could do these initializations once per primitive */ + func = softpipe->depth_stencil->stencil[face].func; + failOp = softpipe->depth_stencil->stencil[face].fail_op; + zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; + zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; + ref = softpipe->depth_stencil->stencil[face].ref_value; + wrtMask = softpipe->depth_stencil->stencil[face].writemask; + valMask = softpipe->depth_stencil->stencil[face].valuemask; + + + /* do the stencil test first */ + { + unsigned passMask, failMask; + passMask = do_stencil_test(data, func, ref, valMask); + failMask = quad->inout.mask & ~passMask; + quad->inout.mask &= passMask; + + if (failOp != PIPE_STENCIL_OP_KEEP) { + apply_stencil_op(data, failMask, failOp, ref, wrtMask); + } + } + + if (quad->inout.mask) { + /* now the pixels that passed the stencil test are depth tested */ + if (softpipe->depth_stencil->depth.enabled) { + const unsigned origMask = quad->inout.mask; + + depth_test_quad(qs, data, quad); /* quad->mask is updated */ + + /* update stencil buffer values according to z pass/fail result */ + if (zFailOp != PIPE_STENCIL_OP_KEEP) { + const unsigned failMask = origMask & ~quad->inout.mask; + apply_stencil_op(data, failMask, zFailOp, ref, wrtMask); } - break; - case PIPE_FORMAT_Z24X8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.depth32[y][x] = bzzzz[j] << 8; + + if (zPassOp != PIPE_STENCIL_OP_KEEP) { + const unsigned passMask = origMask & quad->inout.mask; + apply_stencil_op(data, passMask, zPassOp, ref, wrtMask); } - break; - default: - assert(0); + } + else { + /* no depth test, apply Zpass operator to stencil buffer values */ + apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask); } } - return TRUE; + return quad->inout.mask != 0; } +#define ALPHATEST( FUNC, COMP ) \ + static int \ + alpha_test_quads_##FUNC( struct quad_stage *qs, \ + struct quad_header *quads[], \ + unsigned nr ) \ + { \ + const float ref = qs->softpipe->depth_stencil->alpha.ref_value; \ + const uint cbuf = 0; /* only output[0].alpha is tested */ \ + unsigned pass_nr = 0; \ + unsigned i; \ + \ + for (i = 0; i < nr; i++) { \ + const float *aaaa = quads[i]->output.color[cbuf][3]; \ + unsigned passMask = 0; \ + \ + if (aaaa[0] COMP ref) passMask |= (1 << 0); \ + if (aaaa[1] COMP ref) passMask |= (1 << 1); \ + if (aaaa[2] COMP ref) passMask |= (1 << 2); \ + if (aaaa[3] COMP ref) passMask |= (1 << 3); \ + \ + quads[i]->inout.mask &= passMask; \ + \ + if (quads[i]->inout.mask) \ + quads[pass_nr++] = quads[i]; \ + } \ + \ + return pass_nr; \ + } + + +ALPHATEST( LESS, < ) +ALPHATEST( EQUAL, == ) +ALPHATEST( LEQUAL, <= ) +ALPHATEST( GREATER, > ) +ALPHATEST( NOTEQUAL, != ) +ALPHATEST( GEQUAL, >= ) + + +/* XXX: Incorporate into shader using KILP. + */ +static int +alpha_test_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + switch (qs->softpipe->depth_stencil->alpha.func) { + case PIPE_FUNC_LESS: + return alpha_test_quads_LESS( qs, quads, nr ); + case PIPE_FUNC_EQUAL: + return alpha_test_quads_EQUAL( qs, quads, nr ); + break; + case PIPE_FUNC_LEQUAL: + return alpha_test_quads_LEQUAL( qs, quads, nr ); + case PIPE_FUNC_GREATER: + return alpha_test_quads_GREATER( qs, quads, nr ); + case PIPE_FUNC_NOTEQUAL: + return alpha_test_quads_NOTEQUAL( qs, quads, nr ); + case PIPE_FUNC_GEQUAL: + return alpha_test_quads_GEQUAL( qs, quads, nr ); + case PIPE_FUNC_ALWAYS: + return nr; + case PIPE_FUNC_NEVER: + default: + return 0; + } +} + +static unsigned mask_count[0x8] = +{ + 0, /* 0x0 */ + 1, /* 0x1 */ + 1, /* 0x2 */ + 2, /* 0x3 */ + 1, /* 0x4 */ + 2, /* 0x5 */ + 2, /* 0x6 */ + 3, /* 0x7 */ +}; + + + static void depth_test_quads(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { unsigned i, pass = 0; + const struct sp_fragment_shader *fs = qs->softpipe->fs; + boolean interp_depth = !fs->info.writes_z; + struct depth_data data; - for (i = 0; i < nr; i++) { - if (sp_depth_test_quad(qs, quads[i])) + + if (qs->softpipe->depth_stencil->alpha.enabled) { + nr = alpha_test_quads(qs, quads, nr); + } + + if (qs->softpipe->framebuffer.zsbuf && + (qs->softpipe->depth_stencil->depth.enabled || + qs->softpipe->depth_stencil->stencil[0].enabled)) { + + data.ps = qs->softpipe->framebuffer.zsbuf; + data.format = data.ps->format; + data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, + quads[0]->input.x0, + quads[0]->input.y0); + + for (i = 0; i < nr; i++) { + get_depth_stencil_values(&data, quads[i]); + + if (qs->softpipe->depth_stencil->depth.enabled) { + if (interp_depth) + interpolate_quad_depth(quads[i]); + + convert_quad_depth(&data, quads[i]); + } + + if (qs->softpipe->depth_stencil->stencil[0].enabled) { + if (!depth_stencil_test_quad(qs, &data, quads[i])) + continue; + } + else { + if (!depth_test_quad(qs, &data, quads[i])) + continue; + } + + if (qs->softpipe->depth_stencil->stencil[0].enabled || + qs->softpipe->depth_stencil->depth.writemask) + write_depth_stencil_values(&data, quads[i]); + + qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask]; quads[pass++] = quads[i]; + } + + nr = pass; } - - if (pass) - qs->next->run(qs->next, quads, pass); + + if (nr) + qs->next->run(qs->next, quads, nr); } diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c deleted file mode 100644 index 1048d44984..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c +++ /dev/null @@ -1,94 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief Quad early-z testing - */ - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "sp_quad.h" -#include "sp_quad_pipe.h" - - -/** - * All this stage does is compute the quad's Z values (which is normally - * done by the shading stage). - * The next stage will do the actual depth test. - */ -static void -earlyz_quad( - struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr ) -{ - const float a0z = quads[0]->posCoef->a0[2]; - const float dzdx = quads[0]->posCoef->dadx[2]; - const float dzdy = quads[0]->posCoef->dady[2]; - unsigned i; - - for (i = 0; i < nr; i++) { - const float fx = (float) quads[i]->input.x0; - const float fy = (float) quads[i]->input.y0; - const float z0 = a0z + dzdx * fx + dzdy * fy; - - quads[i]->output.depth[0] = z0; - quads[i]->output.depth[1] = z0 + dzdx; - quads[i]->output.depth[2] = z0 + dzdy; - quads[i]->output.depth[3] = z0 + dzdx + dzdy; - } - - qs->next->run( qs->next, quads, nr ); -} - -static void -earlyz_begin( - struct quad_stage *qs ) -{ - qs->next->begin( qs->next ); -} - -static void -earlyz_destroy( - struct quad_stage *qs ) -{ - FREE( qs ); -} - -struct quad_stage * -sp_quad_earlyz_stage( - struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT( quad_stage ); - - stage->softpipe = softpipe; - stage->begin = earlyz_begin; - stage->run = earlyz_quad; - stage->destroy = earlyz_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index ea5ed3bbd0..56a8f55d77 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -111,24 +111,31 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) } } - if (!z_written) { - /* compute Z values now, as in the quad earlyz stage */ - /* XXX we should really only do this if the earlyz stage is not used */ - const float fx = (float) quad->input.x0; - const float fy = (float) quad->input.y0; - const float dzdx = quad->posCoef->dadx[2]; - const float dzdy = quad->posCoef->dady[2]; - const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; - - quad->output.depth[0] = z0; - quad->output.depth[1] = z0 + dzdx; - quad->output.depth[2] = z0 + dzdy; - quad->output.depth[3] = z0 + dzdx + dzdy; - } - return TRUE; } + + +static void +coverage_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { + float (*quadColor)[4] = quad->output.color[cbuf]; + unsigned j; + for (j = 0; j < QUAD_SIZE; j++) { + assert(quad->input.coverage[j] >= 0.0); + assert(quad->input.coverage[j] <= 1.0); + quadColor[3][j] *= quad->input.coverage[j]; + } + } +} + + + static void shade_quads(struct quad_stage *qs, struct quad_header *quads[], @@ -144,8 +151,13 @@ shade_quads(struct quad_stage *qs, machine->InterpCoefs = quads[0]->coef; for (i = 0; i < nr; i++) { - if (shade_quad(qs, quads[i])) - quads[pass++] = quads[i]; + if (!shade_quad(qs, quads[i])) + continue; + + if (/*do_coverage*/ 0) + coverage_quad( qs, quads[i] ); + + quads[pass++] = quads[i]; } if (pass) diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c deleted file mode 100644 index 4adeb16546..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c +++ /dev/null @@ -1,87 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * \brief Quad occlusion counter stage - * \author Brian Paul - */ - - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_quad_pipe.h" - -static unsigned count_bits( unsigned val ) -{ - unsigned i; - - for (i = 0; val ; val >>= 1) - i += (val & 1); - - return i; -} - -static void -occlusion_count_quads(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) -{ - struct softpipe_context *softpipe = qs->softpipe; - unsigned i; - - for (i = 0; i < nr; i++) - softpipe->occlusion_count += count_bits(quads[i]->inout.mask); - - qs->next->run(qs->next, quads, nr); -} - - -static void occlusion_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void occlusion_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = occlusion_begin; - stage->run = occlusion_count_quads; - stage->destroy = occlusion_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index d138d417ac..1b5bab4eca 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -38,18 +38,6 @@ sp_push_quad_first( struct softpipe_context *sp, sp->quad.first = quad; } -static void -sp_build_depth_stencil( struct softpipe_context *sp ) -{ - if (sp->depth_stencil->stencil[0].enabled || - sp->depth_stencil->stencil[1].enabled) { - sp_push_quad_first( sp, sp->quad.stencil_test ); - } - else if (sp->depth_stencil->depth.enabled && - sp->framebuffer.zsbuf) { - sp_push_quad_first( sp, sp->quad.depth_test ); - } -} void sp_build_quad_pipeline(struct softpipe_context *sp) @@ -61,37 +49,15 @@ sp_build_quad_pipeline(struct softpipe_context *sp) !sp->fs->info.uses_kill && !sp->fs->info.writes_z; - /* build up the pipeline in reverse order... */ - - /* Color combine - */ sp->quad.first = sp->quad.blend; - /* Shade/Depth/Stencil/Alpha - */ - if ((sp->rasterizer->poly_smooth && sp->reduced_prim == PIPE_PRIM_TRIANGLES) || - (sp->rasterizer->line_smooth && sp->reduced_prim == PIPE_PRIM_LINES) || - (sp->rasterizer->point_smooth && sp->reduced_prim == PIPE_PRIM_POINTS)) { - sp_push_quad_first( sp, sp->quad.coverage ); - } - - if (sp->active_query_count) { - sp_push_quad_first( sp, sp->quad.occlusion ); - } - - if (!early_depth_test) { - sp_build_depth_stencil( sp ); - } - - if (sp->depth_stencil->alpha.enabled) { - sp_push_quad_first( sp, sp->quad.alpha_test ); - } - - sp_push_quad_first( sp, sp->quad.shade ); - if (early_depth_test) { - sp_build_depth_stencil( sp ); - sp_push_quad_first( sp, sp->quad.earlyz ); + sp_push_quad_first( sp, sp->quad.shade ); + sp_push_quad_first( sp, sp->quad.depth_test ); + } + else { + sp_push_quad_first( sp, sp->quad.depth_test ); + sp_push_quad_first( sp, sp->quad.shade ); } } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.h b/src/gallium/drivers/softpipe/sp_quad_pipe.h index add31ba705..c0aa134831 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.h +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.h @@ -69,6 +69,4 @@ struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); void sp_build_quad_pipeline(struct softpipe_context *sp); -boolean sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); - #endif /* SP_QUAD_PIPE_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c deleted file mode 100644 index d9ee80e59a..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ /dev/null @@ -1,363 +0,0 @@ - -/** - * \brief Quad stencil testing - */ - - -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_tile_cache.h" -#include "sp_quad_pipe.h" -#include "pipe/p_defines.h" -#include "util/u_memory.h" - - -/** Only 8-bit stencil supported */ -#define STENCIL_MAX 0xff - - -/** - * Do the basic stencil test (compare stencil buffer values against the - * reference value. - * - * \param stencilVals the stencil values from the stencil buffer - * \param func the stencil func (PIPE_FUNC_x) - * \param ref the stencil reference value - * \param valMask the stencil value mask indicating which bits of the stencil - * values and ref value are to be used. - * \return mask indicating which pixels passed the stencil test - */ -static unsigned -do_stencil_test(const ubyte stencilVals[QUAD_SIZE], unsigned func, - unsigned ref, unsigned valMask) -{ - unsigned passMask = 0x0; - unsigned j; - - ref &= valMask; - - switch (func) { - case PIPE_FUNC_NEVER: - /* passMask = 0x0 */ - break; - case PIPE_FUNC_LESS: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref < (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_EQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref == (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_LEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref <= (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_GREATER: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref > (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_NOTEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref != (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_GEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref >= (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_ALWAYS: - passMask = MASK_ALL; - break; - default: - assert(0); - } - - return passMask; -} - - -/** - * Apply the stencil operator to stencil values. - * - * \param stencilVals the stencil buffer values (read and written) - * \param mask indicates which pixels to update - * \param op the stencil operator (PIPE_STENCIL_OP_x) - * \param ref the stencil reference value - * \param wrtMask writemask controlling which bits are changed in the - * stencil values - */ -static void -apply_stencil_op(ubyte stencilVals[QUAD_SIZE], - unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) -{ - unsigned j; - ubyte newstencil[QUAD_SIZE]; - - for (j = 0; j < QUAD_SIZE; j++) { - newstencil[j] = stencilVals[j]; - } - - switch (op) { - case PIPE_STENCIL_OP_KEEP: - /* no-op */ - break; - case PIPE_STENCIL_OP_ZERO: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = 0; - } - } - break; - case PIPE_STENCIL_OP_REPLACE: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = ref; - } - } - break; - case PIPE_STENCIL_OP_INCR: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - if (stencilVals[j] < STENCIL_MAX) { - newstencil[j] = stencilVals[j] + 1; - } - } - } - break; - case PIPE_STENCIL_OP_DECR: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - if (stencilVals[j] > 0) { - newstencil[j] = stencilVals[j] - 1; - } - } - } - break; - case PIPE_STENCIL_OP_INCR_WRAP: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = stencilVals[j] + 1; - } - } - break; - case PIPE_STENCIL_OP_DECR_WRAP: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = stencilVals[j] - 1; - } - } - break; - case PIPE_STENCIL_OP_INVERT: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = ~stencilVals[j]; - } - } - break; - default: - assert(0); - } - - /* - * update the stencil values - */ - if (wrtMask != STENCIL_MAX) { - /* apply bit-wise stencil buffer writemask */ - for (j = 0; j < QUAD_SIZE; j++) { - stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & stencilVals[j]); - } - } - else { - for (j = 0; j < QUAD_SIZE; j++) { - stencilVals[j] = newstencil[j]; - } - } -} - - -/** - * Do stencil (and depth) testing. Stenciling depends on the outcome of - * depth testing. - */ -static void -stencil_test_quad(struct quad_stage *qs, struct quad_header *quads[], - unsigned nr) -{ - struct softpipe_context *softpipe = qs->softpipe; - struct pipe_surface *ps = softpipe->framebuffer.zsbuf; - unsigned func, zFailOp, zPassOp, failOp; - ubyte ref, wrtMask, valMask; - ubyte stencilVals[QUAD_SIZE]; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe->zsbuf_cache, - quads[0]->input.x0, - quads[0]->input.y0); - uint face = quads[0]->input.facing; - uint pass = 0; - uint j, q; - - if (!softpipe->depth_stencil->stencil[1].enabled) { - /* single-sided stencil test, use front (face=0) state */ - face = 0; - } - - /* choose front or back face function, operator, etc */ - /* XXX we could do these initializations once per primitive */ - func = softpipe->depth_stencil->stencil[face].func; - failOp = softpipe->depth_stencil->stencil[face].fail_op; - zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; - zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; - ref = softpipe->depth_stencil->stencil[face].ref_value; - wrtMask = softpipe->depth_stencil->stencil[face].writemask; - valMask = softpipe->depth_stencil->stencil[face].valuemask; - - assert(ps); /* shouldn't get here if there's no stencil buffer */ - - for (q = 0; q < nr; q++) { - struct quad_header *quad = quads[q]; - - /* get stencil values from cached tile */ - switch (ps->format) { - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.depth32[y][x] >> 24; - } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.depth32[y][x] & 0xff; - } - break; - case PIPE_FORMAT_S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.stencil8[y][x]; - } - break; - default: - assert(0); - } - - /* do the stencil test first */ - { - unsigned passMask, failMask; - passMask = do_stencil_test(stencilVals, func, ref, valMask); - failMask = quad->inout.mask & ~passMask; - quad->inout.mask &= passMask; - - if (failOp != PIPE_STENCIL_OP_KEEP) { - apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); - } - } - - if (quad->inout.mask) { - /* now the pixels that passed the stencil test are depth tested */ - if (softpipe->depth_stencil->depth.enabled) { - const unsigned origMask = quad->inout.mask; - - sp_depth_test_quad(qs, quad); /* quad->mask is updated */ - - /* update stencil buffer values according to z pass/fail result */ - if (zFailOp != PIPE_STENCIL_OP_KEEP) { - const unsigned failMask = origMask & ~quad->inout.mask; - apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); - } - - if (zPassOp != PIPE_STENCIL_OP_KEEP) { - const unsigned passMask = origMask & quad->inout.mask; - apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); - } - } - else { - /* no depth test, apply Zpass operator to stencil buffer values */ - apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); - } - - } - - /* put new stencil values into cached tile */ - switch (ps->format) { - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint s8z24 = tile->data.depth32[y][x]; - s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); - tile->data.depth32[y][x] = s8z24; - } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint z24s8 = tile->data.depth32[y][x]; - z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; - tile->data.depth32[y][x] = z24s8; - } - break; - case PIPE_FORMAT_S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.stencil8[y][x] = stencilVals[j]; - } - break; - default: - assert(0); - } - - if (quad->inout.mask) - quads[pass++] = quad; - } - - if (pass) - qs->next->run(qs->next, quads, pass); -} - - -static void stencil_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void stencil_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = stencil_begin; - stage->run = stencil_test_quad; - stage->destroy = stencil_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 0226501267..8654069bde 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -246,9 +246,7 @@ void softpipe_update_derived( struct softpipe_context *softpipe ) if (softpipe->dirty & (SP_NEW_BLEND | SP_NEW_DEPTH_STENCIL_ALPHA | SP_NEW_FRAMEBUFFER | - SP_NEW_RASTERIZER | - SP_NEW_FS | - SP_NEW_QUERY)) + SP_NEW_FS)) sp_build_quad_pipeline(softpipe); softpipe->dirty = 0; -- cgit v1.2.3 From c61145820556833dccd728eb6df3397bec7f70da Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 27 Jul 2009 12:11:16 +0100 Subject: softpipe: fastpath for interpolated z16 less depthtesting Because this is interpolated (ie. early) depth, we can build in an assumption about the quads emitted by triangle setup, ie that they are actually linear spans. Interpolate z over those spans in z16 format to save on math & conversion. --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 143 +++++++++++++++++++++- 1 file changed, 139 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/softpipe/sp_quad_depth_test.c') diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index bf65799a81..506867f4d0 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -646,9 +646,9 @@ static unsigned mask_count[0x8] = static void -depth_test_quads(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) +depth_test_quads_fallback(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { unsigned i, pass = 0; const struct sp_fragment_shader *fs = qs->softpipe->fs; @@ -704,9 +704,144 @@ depth_test_quads(struct quad_stage *qs, qs->next->run(qs->next, quads, nr); } +/* XXX: this function assumes setup function actually emits linear + * spans of quads. It seems a lot more natural to do (early) + * depth-testing on spans rather than quads. + */ +static void +depth_interp_z16_less_write(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + unsigned i, pass = 0; + const unsigned ix = quads[0]->input.x0; + const unsigned iy = quads[0]->input.y0; + const float fx = (float) ix; + const float fy = (float) iy; + const float dzdx = quads[0]->posCoef->dadx[2]; + const float dzdy = quads[0]->posCoef->dady[2]; + const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; + struct softpipe_cached_tile *tile; + ushort (*depth16)[TILE_SIZE]; + ushort idepth[4], depth_step; + const float scale = 65535.0; + + idepth[0] = (ushort)((z0) * scale); + idepth[1] = (ushort)((z0 + dzdx) * scale); + idepth[2] = (ushort)((z0 + dzdy) * scale); + idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); + + depth_step = (ushort)(dzdx * 2 * scale); + + tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); + + depth16 = (ushort (*)[TILE_SIZE]) + &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE]; + + for (i = 0; i < nr; i++) { + unsigned outmask = quads[i]->inout.mask; + unsigned mask = 0; + + if ((outmask & 1) && idepth[0] < depth16[0][0]) { + depth16[0][0] = idepth[0]; + mask |= (1 << 0); + } + + if ((outmask & 2) && idepth[1] < depth16[0][1]) { + depth16[0][1] = idepth[1]; + mask |= (1 << 1); + } + + if ((outmask & 4) && idepth[2] < depth16[1][0]) { + depth16[1][0] = idepth[2]; + mask |= (1 << 2); + } + + if ((outmask & 8) && idepth[3] < depth16[1][1]) { + depth16[1][1] = idepth[3]; + mask |= (1 << 3); + } + + idepth[0] += depth_step; + idepth[1] += depth_step; + idepth[2] += depth_step; + idepth[3] += depth_step; + + depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; + + quads[i]->inout.mask = mask; + if (quads[i]->inout.mask) + quads[pass++] = quads[i]; + } + + if (pass) + qs->next->run(qs->next, quads, pass); + +} + + +static void +depth_noop(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + qs->next->run(qs->next, quads, nr); +} + + + +static void +choose_depth_test(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + boolean interp_depth = !qs->softpipe->fs->info.writes_z; + + boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; + + boolean depth = (qs->softpipe->framebuffer.zsbuf && + qs->softpipe->depth_stencil->depth.enabled); + + unsigned depthfunc = qs->softpipe->depth_stencil->depth.func; + + boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled; + + boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask; + + + qs->run = depth_test_quads_fallback; + + if (!alpha && + !depth && + !stencil) { + qs->run = depth_noop; + } + else if (!alpha && + interp_depth && + depth && + depthfunc == PIPE_FUNC_LESS && + depthwrite && + !stencil) + { + switch (qs->softpipe->framebuffer.zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + qs->run = depth_interp_z16_less_write; + break; + default: + break; + } + } + + qs->run( qs, quads, nr ); +} + + + + static void depth_test_begin(struct quad_stage *qs) { + qs->run = choose_depth_test; qs->next->begin(qs->next); } @@ -723,7 +858,7 @@ struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = depth_test_begin; - stage->run = depth_test_quads; + stage->run = choose_depth_test; stage->destroy = depth_test_destroy; return stage; -- cgit v1.2.3 From 73a6178a73a4cc34195348a537d3f94aab6a43e1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 30 Jul 2009 11:35:08 +0100 Subject: softpipe: add depth-lequal z16 path --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 107 ++++++++++++++++++++-- 1 file changed, 100 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers/softpipe/sp_quad_depth_test.c') diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 506867f4d0..9cffea2c9e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -780,6 +780,81 @@ depth_interp_z16_less_write(struct quad_stage *qs, } +static void +depth_interp_z16_lequal_write(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + unsigned i, pass = 0; + const unsigned ix = quads[0]->input.x0; + const unsigned iy = quads[0]->input.y0; + const float fx = (float) ix; + const float fy = (float) iy; + const float dzdx = quads[0]->posCoef->dadx[2]; + const float dzdy = quads[0]->posCoef->dady[2]; + const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; + struct softpipe_cached_tile *tile; + ushort (*depth16)[TILE_SIZE]; + ushort idepth[4], depth_step; + const float scale = 65535.0; + + idepth[0] = (ushort)((z0) * scale); + idepth[1] = (ushort)((z0 + dzdx) * scale); + idepth[2] = (ushort)((z0 + dzdy) * scale); + idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); + + depth_step = (ushort)(dzdx * 2 * scale); + + tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); + + depth16 = (ushort (*)[TILE_SIZE]) + &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE]; + + for (i = 0; i < nr; i++) { + unsigned outmask = quads[i]->inout.mask; + unsigned mask = 0; + + if ((outmask & 1) && idepth[0] <= depth16[0][0]) { + depth16[0][0] = idepth[0]; + mask |= (1 << 0); + } + + if ((outmask & 2) && idepth[1] <= depth16[0][1]) { + depth16[0][1] = idepth[1]; + mask |= (1 << 1); + } + + if ((outmask & 4) && idepth[2] <= depth16[1][0]) { + depth16[1][0] = idepth[2]; + mask |= (1 << 2); + } + + if ((outmask & 8) && idepth[3] <= depth16[1][1]) { + depth16[1][1] = idepth[3]; + mask |= (1 << 3); + } + + idepth[0] += depth_step; + idepth[1] += depth_step; + idepth[2] += depth_step; + idepth[3] += depth_step; + + depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; + + quads[i]->inout.mask = mask; + if (quads[i]->inout.mask) + quads[pass++] = quads[i]; + } + + if (pass) + qs->next->run(qs->next, quads, pass); + +} + + + + + static void depth_noop(struct quad_stage *qs, struct quad_header *quads[], @@ -809,8 +884,6 @@ choose_depth_test(struct quad_stage *qs, boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask; - qs->run = depth_test_quads_fallback; - if (!alpha && !depth && !stencil) { @@ -819,18 +892,38 @@ choose_depth_test(struct quad_stage *qs, else if (!alpha && interp_depth && depth && - depthfunc == PIPE_FUNC_LESS && depthwrite && !stencil) { - switch (qs->softpipe->framebuffer.zsbuf->format) { - case PIPE_FORMAT_Z16_UNORM: - qs->run = depth_interp_z16_less_write; + switch (depthfunc) { + case PIPE_FUNC_LESS: + switch (qs->softpipe->framebuffer.zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + qs->run = depth_interp_z16_less_write; + break; + default: + qs->run = depth_test_quads_fallback; + break; + } break; - default: + case PIPE_FUNC_LEQUAL: + switch (qs->softpipe->framebuffer.zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + qs->run = depth_interp_z16_lequal_write; + break; + default: + qs->run = depth_test_quads_fallback; + break; + } break; + default: + qs->run = depth_test_quads_fallback; } } + else { + qs->run = depth_test_quads_fallback; + } + qs->run( qs, quads, nr ); } -- cgit v1.2.3 From b626176f0613852df908b4b0552b9b67d5830b4e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 22 Sep 2009 19:26:08 +0100 Subject: softpipe: fix occlusion counting --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/softpipe/sp_quad_depth_test.c') diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 9cffea2c9e..ce1bab9341 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -631,7 +631,7 @@ alpha_test_quads(struct quad_stage *qs, } } -static unsigned mask_count[0x8] = +static unsigned mask_count[16] = { 0, /* 0x0 */ 1, /* 0x1 */ @@ -641,6 +641,14 @@ static unsigned mask_count[0x8] = 2, /* 0x5 */ 2, /* 0x6 */ 3, /* 0x7 */ + 1, /* 0x8 */ + 2, /* 0x9 */ + 2, /* 0xa */ + 3, /* 0xb */ + 2, /* 0xc */ + 3, /* 0xd */ + 3, /* 0xe */ + 4, /* 0xf */ }; @@ -693,13 +701,17 @@ depth_test_quads_fallback(struct quad_stage *qs, qs->softpipe->depth_stencil->depth.writemask) write_depth_stencil_values(&data, quads[i]); - qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask]; quads[pass++] = quads[i]; } nr = pass; } + if (qs->softpipe->active_query_count) { + for (i = 0; i < nr; i++) + qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask]; + } + if (nr) qs->next->run(qs->next, quads, nr); } @@ -883,6 +895,8 @@ choose_depth_test(struct quad_stage *qs, boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask; + boolean occlusion = qs->softpipe->active_query_count; + if (!alpha && !depth && @@ -893,6 +907,7 @@ choose_depth_test(struct quad_stage *qs, interp_depth && depth && depthwrite && + !occlusion && !stencil) { switch (depthfunc) { -- cgit v1.2.3 From fe9ca0f718cbc467e5cee99a2c20a5f257ed2fe1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 22 Sep 2009 20:47:37 +0100 Subject: softpipe: need to write depth/stencil values even when stencil fails --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers/softpipe/sp_quad_depth_test.c') diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index ce1bab9341..0ca86c4e1c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -498,7 +498,7 @@ depth_test_quad(struct quad_stage *qs, * Do stencil (and depth) testing. Stenciling depends on the outcome of * depth testing. */ -static boolean +static void depth_stencil_test_quad(struct quad_stage *qs, struct depth_data *data, struct quad_header *quad) @@ -545,13 +545,13 @@ depth_stencil_test_quad(struct quad_stage *qs, /* update stencil buffer values according to z pass/fail result */ if (zFailOp != PIPE_STENCIL_OP_KEEP) { - const unsigned failMask = origMask & ~quad->inout.mask; - apply_stencil_op(data, failMask, zFailOp, ref, wrtMask); + const unsigned zFailMask = origMask & ~quad->inout.mask; + apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask); } if (zPassOp != PIPE_STENCIL_OP_KEEP) { - const unsigned passMask = origMask & quad->inout.mask; - apply_stencil_op(data, passMask, zPassOp, ref, wrtMask); + const unsigned zPassMask = origMask & quad->inout.mask; + apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask); } } else { @@ -559,8 +559,6 @@ depth_stencil_test_quad(struct quad_stage *qs, apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask); } } - - return quad->inout.mask != 0; } @@ -689,17 +687,17 @@ depth_test_quads_fallback(struct quad_stage *qs, } if (qs->softpipe->depth_stencil->stencil[0].enabled) { - if (!depth_stencil_test_quad(qs, &data, quads[i])) - continue; + depth_stencil_test_quad(qs, &data, quads[i]); + write_depth_stencil_values(&data, quads[i]); } else { if (!depth_test_quad(qs, &data, quads[i])) continue; + + if (qs->softpipe->depth_stencil->depth.writemask) + write_depth_stencil_values(&data, quads[i]); } - if (qs->softpipe->depth_stencil->stencil[0].enabled || - qs->softpipe->depth_stencil->depth.writemask) - write_depth_stencil_values(&data, quads[i]); quads[pass++] = quads[i]; } -- cgit v1.2.3