summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/softpipe/sp_quad_depth_test.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/softpipe/sp_quad_depth_test.c')
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_depth_test.c113
1 files changed, 65 insertions, 48 deletions
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
index 499eebd671..4815a0d49f 100644
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -73,8 +73,8 @@ get_depth_stencil_values( struct depth_data *data,
data->bzzzz[j] = tile->data.depth32[y][x];
}
break;
- case PIPE_FORMAT_X8Z24_UNORM:
- case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
@@ -82,8 +82,8 @@ get_depth_stencil_values( struct depth_data *data,
data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
}
break;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
@@ -146,8 +146,8 @@ convert_quad_depth( struct depth_data *data,
}
}
break;
- case PIPE_FORMAT_X8Z24_UNORM:
- case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
{
float scale = (float) ((1 << 24) - 1);
@@ -156,8 +156,8 @@ convert_quad_depth( struct depth_data *data,
}
}
break;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
{
float scale = (float) ((1 << 24) - 1);
@@ -189,7 +189,7 @@ write_depth_stencil_values( struct depth_data *data,
tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
}
break;
- case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z32_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
@@ -197,21 +197,21 @@ write_depth_stencil_values( struct depth_data *data,
tile->data.depth32[y][x] = data->bzzzz[j];
}
break;
- case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
}
break;
- case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
}
break;
- case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
@@ -728,9 +728,14 @@ depth_test_quads_fallback(struct quad_stage *qs,
qs->next->run(qs->next, quads, nr);
}
-/* XXX: this function assumes setup function actually emits linear
- * spans of quads. It seems a lot more natural to do (early)
- * depth-testing on spans rather than quads.
+
+/**
+ * Special-case Z testing for 16-bit Zbuffer, PIPE_FUNC_LESS and
+ * Z buffer writes enabled.
+ *
+ * NOTE: there's no guarantee that the quads are sequentially side by
+ * side. The fragment shader may have culled some quads, etc. Sliver
+ * triangles may generate non-sequential quads.
*/
static void
depth_interp_z16_less_write(struct quad_stage *qs,
@@ -747,25 +752,33 @@ depth_interp_z16_less_write(struct quad_stage *qs,
const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy;
struct softpipe_cached_tile *tile;
ushort (*depth16)[TILE_SIZE];
- ushort idepth[4], depth_step;
+ ushort init_idepth[4], idepth[4], depth_step;
const float scale = 65535.0;
- idepth[0] = (ushort)((z0) * scale);
- idepth[1] = (ushort)((z0 + dzdx) * scale);
- idepth[2] = (ushort)((z0 + dzdy) * scale);
- idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
+ /* compute scaled depth of the four pixels in first quad */
+ init_idepth[0] = (ushort)((z0) * scale);
+ init_idepth[1] = (ushort)((z0 + dzdx) * scale);
+ init_idepth[2] = (ushort)((z0 + dzdy) * scale);
+ init_idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
- depth_step = (ushort)(dzdx * 2 * scale);
+ depth_step = (ushort)(dzdx * scale);
tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy);
- depth16 = (ushort (*)[TILE_SIZE])
- &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE];
-
for (i = 0; i < nr; i++) {
- unsigned outmask = quads[i]->inout.mask;
+ const unsigned outmask = quads[i]->inout.mask;
+ const int dx = quads[i]->input.x0 - ix;
unsigned mask = 0;
-
+
+ /* compute depth for this quad */
+ idepth[0] = init_idepth[0] + dx * depth_step;
+ idepth[1] = init_idepth[1] + dx * depth_step;
+ idepth[2] = init_idepth[2] + dx * depth_step;
+ idepth[3] = init_idepth[3] + dx * depth_step;
+
+ depth16 = (ushort (*)[TILE_SIZE])
+ &tile->data.depth16[iy % TILE_SIZE][(ix + dx)% TILE_SIZE];
+
if ((outmask & 1) && idepth[0] < depth16[0][0]) {
depth16[0][0] = idepth[0];
mask |= (1 << 0);
@@ -786,13 +799,6 @@ depth_interp_z16_less_write(struct quad_stage *qs,
mask |= (1 << 3);
}
- idepth[0] += depth_step;
- idepth[1] += depth_step;
- idepth[2] += depth_step;
- idepth[3] += depth_step;
-
- depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
-
quads[i]->inout.mask = mask;
if (quads[i]->inout.mask)
quads[pass++] = quads[i];
@@ -804,6 +810,14 @@ depth_interp_z16_less_write(struct quad_stage *qs,
}
+/**
+ * Special-case Z testing for 16-bit Zbuffer, PIPE_FUNC_LEQUAL and
+ * Z buffer writes enabled.
+ *
+ * NOTE: there's no guarantee that the quads are sequentially side by
+ * side. The fragment shader may have culled some quads, etc. Sliver
+ * triangles may generate non-sequential quads.
+ */
static void
depth_interp_z16_lequal_write(struct quad_stage *qs,
struct quad_header *quads[],
@@ -819,25 +833,33 @@ depth_interp_z16_lequal_write(struct quad_stage *qs,
const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy;
struct softpipe_cached_tile *tile;
ushort (*depth16)[TILE_SIZE];
- ushort idepth[4], depth_step;
+ ushort init_idepth[4], idepth[4], depth_step;
const float scale = 65535.0;
- idepth[0] = (ushort)((z0) * scale);
- idepth[1] = (ushort)((z0 + dzdx) * scale);
- idepth[2] = (ushort)((z0 + dzdy) * scale);
- idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
+ /* compute scaled depth of the four pixels in first quad */
+ init_idepth[0] = (ushort)((z0) * scale);
+ init_idepth[1] = (ushort)((z0 + dzdx) * scale);
+ init_idepth[2] = (ushort)((z0 + dzdy) * scale);
+ init_idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
- depth_step = (ushort)(dzdx * 2 * scale);
+ depth_step = (ushort)(dzdx * scale);
tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy);
- depth16 = (ushort (*)[TILE_SIZE])
- &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE];
-
for (i = 0; i < nr; i++) {
- unsigned outmask = quads[i]->inout.mask;
+ const unsigned outmask = quads[i]->inout.mask;
+ const int dx = quads[i]->input.x0 - ix;
unsigned mask = 0;
+ /* compute depth for this quad */
+ idepth[0] = init_idepth[0] + dx * depth_step;
+ idepth[1] = init_idepth[1] + dx * depth_step;
+ idepth[2] = init_idepth[2] + dx * depth_step;
+ idepth[3] = init_idepth[3] + dx * depth_step;
+
+ depth16 = (ushort (*)[TILE_SIZE])
+ &tile->data.depth16[iy % TILE_SIZE][(ix + dx)% TILE_SIZE];
+
if ((outmask & 1) && idepth[0] <= depth16[0][0]) {
depth16[0][0] = idepth[0];
mask |= (1 << 0);
@@ -858,11 +880,6 @@ depth_interp_z16_lequal_write(struct quad_stage *qs,
mask |= (1 << 3);
}
- idepth[0] += depth_step;
- idepth[1] += depth_step;
- idepth[2] += depth_step;
- idepth[3] += depth_step;
-
depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
quads[i]->inout.mask = mask;