From 67b957781d8195b8f8867e994c03b68f8dc5c807 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 7 Sep 2010 07:55:28 +0100 Subject: llvmpipe: pass linear masks to fragment shader Fragment shader can extract the correct bits for each quad. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 66 -------------------------- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 6 +-- src/gallium/drivers/llvmpipe/lp_state_fs.c | 24 ++++++++-- 3 files changed, 23 insertions(+), 73 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index dbaa8e023a..278375652b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -68,36 +68,6 @@ block_full_16(struct lp_rasterizer_task *task, } #if !defined(PIPE_ARCH_SSE) -static INLINE unsigned -build_mask(int c, int dcdx, int dcdy) -{ - int mask = 0; - - int c0 = c; - int c1 = c0 + dcdx; - int c2 = c1 + dcdx; - int c3 = c2 + dcdx; - - mask |= ((c0 + 0 * dcdy) >> 31) & (1 << 0); - mask |= ((c0 + 1 * dcdy) >> 31) & (1 << 2); - mask |= ((c0 + 2 * dcdy) >> 31) & (1 << 8); - mask |= ((c0 + 3 * dcdy) >> 31) & (1 << 10); - mask |= ((c1 + 0 * dcdy) >> 31) & (1 << 1); - mask |= ((c1 + 1 * dcdy) >> 31) & (1 << 3); - mask |= ((c1 + 2 * dcdy) >> 31) & (1 << 9); - mask |= ((c1 + 3 * dcdy) >> 31) & (1 << 11); - mask |= ((c2 + 0 * dcdy) >> 31) & (1 << 4); - mask |= ((c2 + 1 * dcdy) >> 31) & (1 << 6); - mask |= ((c2 + 2 * dcdy) >> 31) & (1 << 12); - mask |= ((c2 + 3 * dcdy) >> 31) & (1 << 14); - mask |= ((c3 + 0 * dcdy) >> 31) & (1 << 5); - mask |= ((c3 + 1 * dcdy) >> 31) & (1 << 7); - mask |= ((c3 + 2 * dcdy) >> 31) & (1 << 13); - mask |= ((c3 + 3 * dcdy) >> 31) & (1 << 15); - - return mask; -} - static INLINE unsigned build_mask_linear(int c, int dcdx, int dcdy) @@ -219,42 +189,6 @@ build_mask_linear(int c, int dcdx, int dcdy) return _mm_movemask_epi8(result); } -static INLINE unsigned -build_mask(int c, int dcdx, int dcdy) -{ - __m128i step = _mm_setr_epi32(0, dcdx, dcdy, dcdx + dcdy); - __m128i c0 = _mm_set1_epi32(c); - - /* Get values across the quad - */ - __m128i cstep0 = _mm_add_epi32(c0, step); - - /* Scale up step for moving between quads. - */ - __m128i step4 = _mm_add_epi32(step, step); - - /* Get values for the remaining quads: - */ - __m128i cstep1 = _mm_add_epi32(cstep0, - _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1))); - __m128i cstep2 = _mm_add_epi32(cstep0, - _mm_shuffle_epi32(step4, _MM_SHUFFLE(2,2,2,2))); - __m128i cstep3 = _mm_add_epi32(cstep2, - _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1))); - - /* pack pairs of results into epi16 - */ - __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); - __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); - - /* pack into epi8, preserving sign bits - */ - __m128i result = _mm_packs_epi16(cstep01, cstep23); - - /* extract sign bits to create mask - */ - return _mm_movemask_epi8(result); -} #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 52b9012036..a99c8ecfa7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -50,9 +50,9 @@ TAG(do_block_4)(struct lp_rasterizer_task *task, int j; for (j = 0; j < NR_PLANES; j++) { - mask &= ~build_mask(c[j] - 1, - -plane[j].dcdx, - plane[j].dcdy); + mask &= ~build_mask_linear(c[j] - 1, + -plane[j].dcdx, + plane[j].dcdy); } /* Now pass to the shader: diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 1bcc16dd6d..6053e825e1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -186,6 +186,7 @@ generate_quad_mask(LLVMBuilderRef builder, LLVMTypeRef i32t = LLVMInt32Type(); LLVMValueRef bits[4]; LLVMValueRef mask; + int shift; /* * XXX: We'll need a different path for 16 x u8 @@ -197,10 +198,25 @@ generate_quad_mask(LLVMBuilderRef builder, /* * mask_input >>= (quad * 4) */ + + switch (quad) { + case 0: + shift = 0; + break; + case 1: + shift = 2; + break; + case 2: + shift = 8; + break; + case 3: + shift = 10; + break; + } mask_input = LLVMBuildLShr(builder, mask_input, - LLVMConstInt(i32t, quad * 4, 0), + LLVMConstInt(i32t, shift, 0), ""); /* @@ -211,9 +227,9 @@ generate_quad_mask(LLVMBuilderRef builder, bits[0] = LLVMConstInt(i32t, 1 << 0, 0); bits[1] = LLVMConstInt(i32t, 1 << 1, 0); - bits[2] = LLVMConstInt(i32t, 1 << 2, 0); - bits[3] = LLVMConstInt(i32t, 1 << 3, 0); - + bits[2] = LLVMConstInt(i32t, 1 << 4, 0); + bits[3] = LLVMConstInt(i32t, 1 << 5, 0); + mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), ""); /* -- cgit v1.2.3