summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKeith Whitwell <keithw@vmware.com>2010-10-09 11:28:00 +0100
committerKeith Whitwell <keithw@vmware.com>2010-10-09 11:44:45 +0100
commit5b7eb868fde98388d80601d8dea39e679828f42f (patch)
treec97e8a511ec7cf3bb6df50098b8cb22d2b3e5277 /src
parentaa4cb5e2d8d48c7dcc9653c61a9e25494e3e7b2a (diff)
llvmpipe: clean up shader pre/postamble, try to catch more early-z
Specifically, can do early-depth-test even when alpahtest or kill-pixel are active, providing we defer the actual z write until the final mask is avaialable. Improves demos/fire.c especially in the case where you get close to the trees.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c40
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.h15
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c241
3 files changed, 193 insertions, 103 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 6b8ffb6ca2..8d9be2ebbb 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -410,7 +410,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
* \param maskvalue is the depth test mask.
* \param counter is a pointer of the uint32 counter.
*/
-static void
+void
lp_build_occlusion_count(LLVMBuilderRef builder,
struct lp_type type,
LLVMValueRef maskvalue,
@@ -462,7 +462,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
LLVMValueRef z_src,
LLVMValueRef zs_dst_ptr,
LLVMValueRef face,
- LLVMValueRef counter,
+ LLVMValueRef *zs_value,
boolean do_branch)
{
struct lp_type type;
@@ -524,17 +524,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
* storage.
*/
if (depth->writemask) {
- type.sign = 0;
+ type.sign = 1;
lp_build_context_init(&bld, builder, type);
z_dst = lp_build_select(&bld, mask->value, z_src, z_dst);
z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst");
- LLVMBuildStore(builder, z_dst, zs_dst_ptr);
+ *zs_value = z_dst;
}
- if (counter)
- lp_build_occlusion_count(builder, type, mask->value, counter);
-
return;
}
@@ -779,7 +776,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
else
zs_dst = stencil_vals;
- LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
+ *zs_value = zs_dst;
}
if (s_pass_mask)
@@ -791,6 +788,29 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
if (do_branch)
lp_build_mask_check(mask);
- if (counter)
- lp_build_occlusion_count(builder, type, mask->value, counter);
+}
+
+
+
+void
+lp_build_deferred_depth_write(LLVMBuilderRef builder,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef zs_value)
+{
+ struct lp_type type;
+ struct lp_build_context bld;
+ LLVMValueRef z_dst;
+
+ /* XXX: pointlessly redo type logic:
+ */
+ type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
+ lp_build_context_init(&bld, builder, type);
+
+ z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
+ z_dst = lp_build_select(&bld, mask->value, zs_value, z_dst);
+
+ LLVMBuildStore(builder, z_dst, zs_dst_ptr);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index 2a63bb9378..0f89668123 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -61,8 +61,21 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
LLVMValueRef zs_src,
LLVMValueRef zs_dst_ptr,
LLVMValueRef facing,
- LLVMValueRef counter,
+ LLVMValueRef *zs_value,
boolean do_branch);
+void
+lp_build_deferred_depth_write(LLVMBuilderRef builder,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef zs_value);
+
+void
+lp_build_occlusion_count(LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef maskvalue,
+ LLVMValueRef counter);
#endif /* !LP_BLD_DEPTH_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index df5dd83c87..f45f36f633 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -104,43 +104,6 @@
static unsigned fs_no = 0;
-/**
- * Generate the depth /stencil test code.
- */
-static void
-generate_depth_stencil(LLVMBuilderRef builder,
- const struct lp_fragment_shader_variant_key *key,
- struct lp_type src_type,
- struct lp_build_mask_context *mask,
- LLVMValueRef stencil_refs[2],
- LLVMValueRef src,
- LLVMValueRef dst_ptr,
- LLVMValueRef facing,
- LLVMValueRef counter,
- boolean do_branch)
-{
- const struct util_format_description *format_desc;
-
- if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled)
- return;
-
- format_desc = util_format_description(key->zsbuf_format);
- assert(format_desc);
-
- lp_build_depth_stencil_test(builder,
- &key->depth,
- key->stencil,
- src_type,
- format_desc,
- mask,
- stencil_refs,
- src,
- dst_ptr,
- facing,
- counter,
- do_branch);
-}
-
/**
* Expand the relevent bits of mask_input to a 4-dword mask for the
@@ -222,6 +185,26 @@ generate_quad_mask(LLVMBuilderRef builder,
}
+#define EARLY_DEPTH_TEST 0x1
+#define LATE_DEPTH_TEST 0x2
+#define EARLY_DEPTH_WRITE 0x4
+#define LATE_DEPTH_WRITE 0x8
+
+static int
+find_output_by_semantic( const struct tgsi_shader_info *info,
+ unsigned semantic,
+ unsigned index )
+{
+ int i;
+
+ for (i = 0; i < info->num_outputs; i++)
+ if (info->output_semantic_name[i] == semantic &&
+ info->output_semantic_index[i] == index)
+ return i;
+
+ return -1;
+}
+
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
@@ -246,21 +229,53 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef mask_input,
LLVMValueRef counter)
{
+ const struct util_format_description *zs_format_desc = NULL;
const struct tgsi_token *tokens = shader->base.tokens;
LLVMTypeRef vec_type;
LLVMValueRef consts_ptr;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
LLVMValueRef z;
+ LLVMValueRef zs_value = NULL;
LLVMValueRef stencil_refs[2];
struct lp_build_flow_context *flow;
struct lp_build_mask_context mask;
- boolean early_depth_stencil_test;
boolean simple_shader = (shader->info.file_count[TGSI_FILE_SAMPLER] == 0 &&
shader->info.num_inputs < 3 &&
shader->info.num_instructions < 8);
unsigned attrib;
unsigned chan;
unsigned cbuf;
+ unsigned depth_mode;
+
+ if (key->depth.enabled ||
+ key->stencil[0].enabled ||
+ key->stencil[1].enabled) {
+
+ zs_format_desc = util_format_description(key->zsbuf_format);
+ assert(zs_format_desc);
+
+ if (!shader->info.writes_z) {
+ if (key->alpha.enabled || shader->info.uses_kill)
+ /* With alpha test and kill, can do the depth test early
+ * and hopefully eliminate some quads. But need to do a
+ * special deferred depth write once the final mask value
+ * is known.
+ */
+ depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
+ else
+ depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+ }
+ else {
+ depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+ }
+
+ if (!(key->depth.enabled && key->depth.writemask) &&
+ !(key->stencil[0].enabled && key->stencil[0].writemask))
+ depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
+ }
+ else {
+ depth_mode = 0;
+ }
assert(i < 4);
@@ -293,79 +308,121 @@ generate_fs(struct llvmpipe_context *lp,
*pmask = lp_build_const_int_vec(type, ~0);
}
-
- early_depth_stencil_test =
- (key->depth.enabled || key->stencil[0].enabled) &&
- !key->alpha.enabled &&
- !shader->info.uses_kill &&
- !shader->info.writes_z;
-
/* 'mask' will control execution based on quad's pixel alive/killed state */
lp_build_mask_begin(&mask, flow, type, *pmask);
- if (!early_depth_stencil_test && !simple_shader)
+ if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
lp_build_mask_check(&mask);
lp_build_interp_soa_update_pos(interp, i);
z = interp->pos[2];
- if (early_depth_stencil_test)
- generate_depth_stencil(builder, key,
- type, &mask,
- stencil_refs,
- z, depth_ptr,
- facing, counter,
- !simple_shader);
+ if (depth_mode & EARLY_DEPTH_TEST) {
+ lp_build_depth_stencil_test(builder,
+ &key->depth,
+ key->stencil,
+ type,
+ zs_format_desc,
+ &mask,
+ stencil_refs,
+ z,
+ depth_ptr, facing,
+ &zs_value,
+ !simple_shader);
+
+ if (depth_mode & EARLY_DEPTH_WRITE)
+ LLVMBuildStore(builder, zs_value, depth_ptr);
+ }
lp_build_interp_soa_update_inputs(interp, i);
-
+
+ /* Build the actual shader */
lp_build_tgsi_soa(builder, tokens, type, &mask,
consts_ptr, interp->pos, interp->inputs,
outputs, sampler, &shader->info);
- /* loop over fragment shader outputs/results */
- for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
- for(chan = 0; chan < NUM_CHANNELS; ++chan) {
- if(outputs[attrib][chan]) {
+
+ /* Alpha test */
+ if (key->alpha.enabled) {
+ int color0 = find_output_by_semantic(&shader->info,
+ TGSI_SEMANTIC_COLOR,
+ 0);
+
+ if (color0 != -1) {
+ LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
+ LLVMValueRef alpha_ref_value;
+
+ alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
+ alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
+
+ lp_build_alpha_test(builder, key->alpha.func, type,
+ &mask, alpha, alpha_ref_value,
+ (depth_mode & LATE_DEPTH_TEST) != 0);
+ }
+ }
+
+ /* Late Z test */
+ if (depth_mode & LATE_DEPTH_TEST) {
+ int pos0 = find_output_by_semantic(&shader->info,
+ TGSI_SEMANTIC_POSITION,
+ 0);
+
+ if (pos0 != -1) {
+ z = LLVMBuildLoad(builder, outputs[pos0][2], "z");
+ lp_build_name(z, "output%u.%u.%c", i, pos0, "xyzw"[chan]);
+ }
+
+ lp_build_depth_stencil_test(builder,
+ &key->depth,
+ key->stencil,
+ type,
+ zs_format_desc,
+ &mask,
+ stencil_refs,
+ z,
+ depth_ptr, facing,
+ &zs_value,
+ !simple_shader);
+ /* Late Z write */
+ if (depth_mode & LATE_DEPTH_WRITE)
+ LLVMBuildStore(builder, zs_value, depth_ptr);
+ }
+ else if ((depth_mode & EARLY_DEPTH_TEST) &&
+ (depth_mode & LATE_DEPTH_WRITE))
+ {
+ /* Need to apply a reduced mask to the depth write. Reload the
+ * depth value, update from zs_value with the new mask value and
+ * write that out.
+ */
+ lp_build_deferred_depth_write(builder,
+ type,
+ zs_format_desc,
+ &mask,
+ depth_ptr,
+ zs_value);
+ }
+
+
+ /* Color write */
+ for (attrib = 0; attrib < shader->info.num_outputs; ++attrib)
+ {
+ if (shader->info.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR)
+ {
+ unsigned cbuf = shader->info.output_semantic_index[attrib];
+ for(chan = 0; chan < NUM_CHANNELS; ++chan)
+ {
+ /* XXX: just initialize outputs to point at colors[] and
+ * skip this.
+ */
LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
- lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]);
-
- switch (shader->info.output_semantic_name[attrib]) {
- case TGSI_SEMANTIC_COLOR:
- {
- unsigned cbuf = shader->info.output_semantic_index[attrib];
-
- lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
-
- /* Alpha test */
- /* XXX: should only test the final assignment to alpha */
- if (cbuf == 0 && chan == 3 && key->alpha.enabled) {
- LLVMValueRef alpha = out;
- LLVMValueRef alpha_ref_value;
- alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
- alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
- lp_build_alpha_test(builder, key->alpha.func, type,
- &mask, alpha, alpha_ref_value, FALSE);
- }
-
- LLVMBuildStore(builder, out, color[cbuf][chan]);
- break;
- }
-
- case TGSI_SEMANTIC_POSITION:
- if(chan == 2)
- z = out;
- break;
- }
+ lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
+ LLVMBuildStore(builder, out, color[cbuf][chan]);
}
}
}
- if (!early_depth_stencil_test)
- generate_depth_stencil(builder, key,
- type, &mask,
- stencil_refs, z, depth_ptr,
- facing, counter, FALSE);
+ if (counter)
+ lp_build_occlusion_count(builder, type, mask.value, counter);
lp_build_mask_end(&mask);