From 2d6b39f05edcd575b1e10d53f96a38bec848fa67 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 9 Aug 2009 23:10:19 +0100 Subject: llvmpipe: Use the generated SoA blending code. --- src/gallium/drivers/llvmpipe/lp_context.h | 5 +- src/gallium/drivers/llvmpipe/lp_quad.h | 2 +- src/gallium/drivers/llvmpipe/lp_quad_blend.c | 799 ++------------------------ src/gallium/drivers/llvmpipe/lp_screen.c | 35 +- src/gallium/drivers/llvmpipe/lp_screen.h | 13 +- src/gallium/drivers/llvmpipe/lp_state.h | 19 + src/gallium/drivers/llvmpipe/lp_state_blend.c | 132 ++++- 7 files changed, 225 insertions(+), 780 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 7b5da6ee91..72387301a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -46,13 +46,14 @@ struct llvmpipe_tile_cache; struct llvmpipe_tex_tile_cache; struct lp_fragment_shader; struct lp_vertex_shader; +struct lp_blend_state; struct llvmpipe_context { struct pipe_context pipe; /**< base class */ /** Constant state objects */ - const struct pipe_blend_state *blend; + struct lp_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; const struct pipe_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; @@ -60,7 +61,7 @@ struct llvmpipe_context { const struct lp_vertex_shader *vs; /** Other rendering state */ - struct pipe_blend_color blend_color; + float ALIGN16_ATTRIB blend_color[4][QUAD_SIZE]; struct pipe_clip_state clip; struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h index b25c35d327..96cd27de81 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ b/src/gallium/drivers/llvmpipe/lp_quad.h @@ -83,7 +83,7 @@ struct quad_header_inout struct quad_header_output { /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; float depth[QUAD_SIZE]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_quad_blend.c b/src/gallium/drivers/llvmpipe/lp_quad_blend.c index fe6d30d7c8..fa2e902b6e 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_quad_blend.c @@ -26,96 +26,24 @@ **************************************************************************/ /** - * quad blending - * \author Brian Paul + * Quad blending. + * + * @author Jose Fonseca + * @author Brian Paul */ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_debug_dump.h" #include "lp_context.h" +#include "lp_state.h" #include "lp_quad.h" #include "lp_surface.h" #include "lp_tile_cache.h" #include "lp_quad_pipe.h" -#define VEC4_COPY(DST, SRC) \ -do { \ - DST[0] = SRC[0]; \ - DST[1] = SRC[1]; \ - DST[2] = SRC[2]; \ - DST[3] = SRC[3]; \ -} while(0) - -#define VEC4_SCALAR(DST, SRC) \ -do { \ - DST[0] = SRC; \ - DST[1] = SRC; \ - DST[2] = SRC; \ - DST[3] = SRC; \ -} while(0) - -#define VEC4_ADD(R, A, B) \ -do { \ - R[0] = A[0] + B[0]; \ - R[1] = A[1] + B[1]; \ - R[2] = A[2] + B[2]; \ - R[3] = A[3] + B[3]; \ -} while (0) - -#define VEC4_SUB(R, A, B) \ -do { \ - R[0] = A[0] - B[0]; \ - R[1] = A[1] - B[1]; \ - R[2] = A[2] - B[2]; \ - R[3] = A[3] - B[3]; \ -} while (0) - -/** Add and limit result to ceiling of 1.0 */ -#define VEC4_ADD_SAT(R, A, B) \ -do { \ - R[0] = A[0] + B[0]; if (R[0] > 1.0f) R[0] = 1.0f; \ - R[1] = A[1] + B[1]; if (R[1] > 1.0f) R[1] = 1.0f; \ - R[2] = A[2] + B[2]; if (R[2] > 1.0f) R[2] = 1.0f; \ - R[3] = A[3] + B[3]; if (R[3] > 1.0f) R[3] = 1.0f; \ -} while (0) - -/** Subtract and limit result to floor of 0.0 */ -#define VEC4_SUB_SAT(R, A, B) \ -do { \ - R[0] = A[0] - B[0]; if (R[0] < 0.0f) R[0] = 0.0f; \ - R[1] = A[1] - B[1]; if (R[1] < 0.0f) R[1] = 0.0f; \ - R[2] = A[2] - B[2]; if (R[2] < 0.0f) R[2] = 0.0f; \ - R[3] = A[3] - B[3]; if (R[3] < 0.0f) R[3] = 0.0f; \ -} while (0) - -#define VEC4_MUL(R, A, B) \ -do { \ - R[0] = A[0] * B[0]; \ - R[1] = A[1] * B[1]; \ - R[2] = A[2] * B[2]; \ - R[3] = A[3] * B[3]; \ -} while (0) - -#define VEC4_MIN(R, A, B) \ -do { \ - R[0] = (A[0] < B[0]) ? A[0] : B[0]; \ - R[1] = (A[1] < B[1]) ? A[1] : B[1]; \ - R[2] = (A[2] < B[2]) ? A[2] : B[2]; \ - R[3] = (A[3] < B[3]) ? A[3] : B[3]; \ -} while (0) - -#define VEC4_MAX(R, A, B) \ -do { \ - R[0] = (A[0] > B[0]) ? A[0] : B[0]; \ - R[1] = (A[1] > B[1]) ? A[1] : B[1]; \ - R[2] = (A[2] > B[2]) ? A[2] : B[2]; \ - R[3] = (A[3] > B[3]) ? A[3] : B[3]; \ -} while (0) - - - static void logicop_quad(struct quad_stage *qs, float (*quadColor)[4], @@ -142,7 +70,7 @@ logicop_quad(struct quad_stage *qs, src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ } - switch (llvmpipe->blend->logicop_func) { + switch (llvmpipe->blend->base.logicop_func) { case PIPE_LOGICOP_CLEAR: for (j = 0; j < 4; j++) res4[j] = 0; @@ -220,488 +148,6 @@ logicop_quad(struct quad_stage *qs, } - -static void -blend_quad(struct quad_stage *qs, - float (*quadColor)[4], - float (*dest)[4]) -{ - static const float zero[4] = { 0, 0, 0, 0 }; - static const float one[4] = { 1, 1, 1, 1 }; - struct llvmpipe_context *llvmpipe = qs->llvmpipe; - float source[4][QUAD_SIZE]; - - /* - * Compute src/first term RGB - */ - switch (llvmpipe->blend->rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[0], quadColor[0]); /* R */ - VEC4_COPY(source[1], quadColor[1]); /* G */ - VEC4_COPY(source[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - { - const float *alpha = quadColor[3]; - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - { - const float *alpha = dest[3]; - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - { - const float *alpha = quadColor[3]; - float diff[4], temp[4]; - VEC4_SUB(diff, one, dest[3]); - VEC4_MIN(temp, alpha, diff); - VEC4_MUL(source[0], quadColor[0], temp); /* R */ - VEC4_MUL(source[1], quadColor[1], temp); /* G */ - VEC4_MUL(source[2], quadColor[2], temp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - { - float comp[4]; - VEC4_SCALAR(comp, llvmpipe->blend_color.color[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], comp); /* R */ - VEC4_SCALAR(comp, llvmpipe->blend_color.color[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], comp); /* G */ - VEC4_SCALAR(comp, llvmpipe->blend_color.color[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float alpha[4]; - VEC4_SCALAR(alpha, llvmpipe->blend_color.color[3]); - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[0], zero); /* R */ - VEC4_COPY(source[1], zero); /* G */ - VEC4_COPY(source[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, quadColor[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - { - float inv_comp[4]; - /* R */ - VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[0]); - VEC4_MUL(source[0], quadColor[0], inv_comp); - /* G */ - VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[1]); - VEC4_MUL(source[1], quadColor[1], inv_comp); - /* B */ - VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[2]); - VEC4_MUL(source[2], quadColor[2], inv_comp); - } - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_alpha[4]; - VEC4_SCALAR(inv_alpha, 1.0f - llvmpipe->blend_color.color[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - assert(0); /* to do */ - break; - default: - assert(0); - } - - /* - * Compute src/first term A - */ - switch (llvmpipe->blend->alpha_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - { - const float *alpha = quadColor[3]; - VEC4_MUL(source[3], quadColor[3], alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* multiply alpha by 1.0 */ - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */ - VEC4_MUL(source[3], quadColor[3], comp); /* A */ - } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, quadColor[3]); - VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - /* A */ - VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]); - VEC4_MUL(source[3], quadColor[3], inv_comp); - } - break; - default: - assert(0); - } - - - /* - * Compute dest/second term RGB - */ - switch (llvmpipe->blend->rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ - VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ - VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ - VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ - VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - { - float comp[4]; - VEC4_SCALAR(comp, llvmpipe->blend_color.color[0]); /* R */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_SCALAR(comp, llvmpipe->blend_color.color[1]); /* G */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_SCALAR(comp, llvmpipe->blend_color.color[2]); /* B */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float one_minus_alpha[QUAD_SIZE]; - VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ - VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ - VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - { - float inv_comp[4]; - /* R */ - VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[0]); - VEC4_MUL(dest[0], dest[0], inv_comp); - /* G */ - VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[1]); - VEC4_MUL(dest[1], dest[1], inv_comp); - /* B */ - VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[2]); - VEC4_MUL(dest[2], dest[2], inv_comp); - } - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]); - VEC4_MUL(dest[0], dest[0], inv_comp); - VEC4_MUL(dest[1], dest[1], inv_comp); - VEC4_MUL(dest[2], dest[2], inv_comp); - } - break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - default: - assert(0); - } - - /* - * Compute dest/second term A - */ - switch (llvmpipe->blend->alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[3], dest[3], comp); /* A */ - } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float one_minus_alpha[QUAD_SIZE]; - VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]); - VEC4_MUL(dest[3], dest[3], inv_comp); - } - break; - default: - assert(0); - } - - /* - * Combine RGB terms - */ - switch (llvmpipe->blend->rgb_func) { - case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ - break; - default: - assert(0); - } - - /* - * Combine A terms - */ - switch (llvmpipe->blend->alpha_func) { - case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ - break; - default: - assert(0); - } -} - static void colormask_quad(struct quad_stage *qs, float (*quadColor)[4], @@ -710,40 +156,45 @@ colormask_quad(struct quad_stage *qs, struct llvmpipe_context *llvmpipe = qs->llvmpipe; /* R */ - if (!(llvmpipe->blend->colormask & PIPE_MASK_R)) + if (!(llvmpipe->blend->base.colormask & PIPE_MASK_R)) COPY_4V(quadColor[0], dest[0]); /* G */ - if (!(llvmpipe->blend->colormask & PIPE_MASK_G)) + if (!(llvmpipe->blend->base.colormask & PIPE_MASK_G)) COPY_4V(quadColor[1], dest[1]); /* B */ - if (!(llvmpipe->blend->colormask & PIPE_MASK_B)) + if (!(llvmpipe->blend->base.colormask & PIPE_MASK_B)) COPY_4V(quadColor[2], dest[2]); /* A */ - if (!(llvmpipe->blend->colormask & PIPE_MASK_A)) + if (!(llvmpipe->blend->base.colormask & PIPE_MASK_A)) COPY_4V(quadColor[3], dest[3]); } +static void blend_begin(struct quad_stage *qs) +{ +} + + static void -blend_fallback(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) +blend_run(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { struct llvmpipe_context *llvmpipe = qs->llvmpipe; - const struct pipe_blend_state *blend = llvmpipe->blend; + struct lp_blend_state *blend = llvmpipe->blend; unsigned cbuf; + uint q, i, j; for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) { - float dest[4][QUAD_SIZE]; + float ALIGN16_ATTRIB dest[4][QUAD_SIZE]; struct llvmpipe_cached_tile *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf], quads[0]->input.x0, quads[0]->input.y0); - uint q, i, j; for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; @@ -762,14 +213,19 @@ blend_fallback(struct quad_stage *qs, } - if (blend->logicop_enable) { + if (blend->base.logicop_enable) { logicop_quad( qs, quadColor, dest ); } - else if (blend->blend_enable) { - blend_quad( qs, quadColor, dest ); + else { + assert(blend->jit_function); + assert((((uintptr_t)quadColor) & 0xf) == 0); + assert((((uintptr_t)dest) & 0xf) == 0); + assert((((uintptr_t)llvmpipe->blend_color) & 0xf) == 0); + if(blend->jit_function) + blend->jit_function( quadColor, dest, llvmpipe->blend_color, quadColor ); } - if (blend->colormask != 0xf) + if (blend->base.colormask != 0xf) colormask_quad( qs, quadColor, dest ); /* Output color values @@ -788,197 +244,6 @@ blend_fallback(struct quad_stage *qs, } -static void -blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - static const float one[4] = { 1, 1, 1, 1 }; - float one_minus_alpha[QUAD_SIZE]; - float dest[4][QUAD_SIZE]; - float source[4][QUAD_SIZE]; - uint i, j, q; - - struct llvmpipe_cached_tile *tile - = lp_get_cached_tile(qs->llvmpipe->cbuf_cache[0], - quads[0]->input.x0, - quads[0]->input.y0); - - for (q = 0; q < nr; q++) { - struct quad_header *quad = quads[q]; - float (*quadColor)[4] = quad->output.color[0]; - const float *alpha = quadColor[3]; - const int itx = (quad->input.x0 & (TILE_SIZE-1)); - const int ity = (quad->input.y0 & (TILE_SIZE-1)); - - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; - } - } - - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - VEC4_MUL(source[3], quadColor[3], alpha); /* A */ - - VEC4_SUB(one_minus_alpha, one, alpha); - VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ - VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ - VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ - VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */ - - VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ - VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ - - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { /* loop over color chans */ - tile->data.color[y][x][i] = quadColor[i][j]; - } - } - } - } -} - -static void -blend_single_add_one_one(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - float dest[4][QUAD_SIZE]; - uint i, j, q; - - struct llvmpipe_cached_tile *tile - = lp_get_cached_tile(qs->llvmpipe->cbuf_cache[0], - quads[0]->input.x0, - quads[0]->input.y0); - - for (q = 0; q < nr; q++) { - struct quad_header *quad = quads[q]; - float (*quadColor)[4] = quad->output.color[0]; - const int itx = (quad->input.x0 & (TILE_SIZE-1)); - const int ity = (quad->input.y0 & (TILE_SIZE-1)); - - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; - } - } - - VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */ - VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */ - - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { /* loop over color chans */ - tile->data.color[y][x][i] = quadColor[i][j]; - } - } - } - } -} - - -static void -single_output_color(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - uint i, j, q; - - struct llvmpipe_cached_tile *tile - = lp_get_cached_tile(qs->llvmpipe->cbuf_cache[0], - quads[0]->input.x0, - quads[0]->input.y0); - - for (q = 0; q < nr; q++) { - struct quad_header *quad = quads[q]; - float (*quadColor)[4] = quad->output.color[0]; - const int itx = (quad->input.x0 & (TILE_SIZE-1)); - const int ity = (quad->input.y0 & (TILE_SIZE-1)); - - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { /* loop over color chans */ - tile->data.color[y][x][i] = quadColor[i][j]; - } - } - } - } -} - -static void -blend_noop(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ -} - - -static void -choose_blend_quad(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - struct llvmpipe_context *llvmpipe = qs->llvmpipe; - const struct pipe_blend_state *blend = llvmpipe->blend; - - qs->run = blend_fallback; - - if (llvmpipe->framebuffer.nr_cbufs == 0) { - qs->run = blend_noop; - } - else if (!llvmpipe->blend->logicop_enable && - llvmpipe->blend->colormask == 0xf) - { - if (!blend->blend_enable) { - qs->run = single_output_color; - } - else if (blend->rgb_src_factor == blend->alpha_src_factor && - blend->rgb_dst_factor == blend->alpha_dst_factor && - blend->rgb_func == blend->alpha_func && - llvmpipe->framebuffer.nr_cbufs == 1) - { - if (blend->alpha_func == PIPE_BLEND_ADD) { - if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE && - blend->rgb_dst_factor == PIPE_BLENDFACTOR_ONE) { - qs->run = blend_single_add_one_one; - } - else if (blend->rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA && - blend->rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA) - qs->run = blend_single_add_src_alpha_inv_src_alpha; - - } - } - } - - qs->run(qs, quads, nr); -} - - -static void blend_begin(struct quad_stage *qs) -{ - qs->run = choose_blend_quad; -} - - static void blend_destroy(struct quad_stage *qs) { FREE( qs ); @@ -991,7 +256,7 @@ struct quad_stage *lp_quad_blend_stage( struct llvmpipe_context *llvmpipe ) stage->llvmpipe = llvmpipe; stage->begin = blend_begin; - stage->run = choose_blend_quad; + stage->run = blend_run; stage->destroy = blend_destroy; return stage; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index b71f15678e..2236711e9b 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -26,6 +26,8 @@ **************************************************************************/ +#include + #include "util/u_memory.h" #include "util/u_simple_screen.h" #include "pipe/internal/p_winsys_screen.h" @@ -149,9 +151,17 @@ llvmpipe_is_format_supported( struct pipe_screen *screen, static void -llvmpipe_destroy_screen( struct pipe_screen *screen ) +llvmpipe_destroy_screen( struct pipe_screen *_screen ) { - struct pipe_winsys *winsys = screen->winsys; + struct llvmpipe_screen *screen = llvmpipe_screen(_screen); + + struct pipe_winsys *winsys = _screen->winsys; + + if(screen->engine) + LLVMDisposeExecutionEngine(screen->engine); + + if(screen->pass) + LLVMDisposePassManager(screen->pass); if(winsys->destroy) winsys->destroy(winsys); @@ -169,6 +179,7 @@ struct pipe_screen * llvmpipe_create_screen(struct pipe_winsys *winsys) { struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen); + char *error = NULL; if (!screen) return NULL; @@ -186,5 +197,25 @@ llvmpipe_create_screen(struct pipe_winsys *winsys) llvmpipe_init_screen_texture_funcs(&screen->base); u_simple_screen_init(&screen->base); + screen->module = LLVMModuleCreateWithName("llvmpipe"); + + screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module); + + if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) { + fprintf(stderr, "%s\n", error); + LLVMDisposeMessage(error); + abort(); + } + + screen->pass = LLVMCreateFunctionPassManager(screen->provider); + LLVMAddTargetData(LLVMGetExecutionEngineTargetData(screen->engine), screen->pass); + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + LLVMAddConstantPropagationPass(screen->pass); + LLVMAddInstructionCombiningPass(screen->pass); + LLVMAddPromoteMemoryToRegisterPass(screen->pass); + LLVMAddGVNPass(screen->pass); + LLVMAddCFGSimplificationPass(screen->pass); + return &screen->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h index 7386ebefe6..c3ff1531d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/src/gallium/drivers/llvmpipe/lp_screen.h @@ -31,14 +31,23 @@ #ifndef LP_SCREEN_H #define LP_SCREEN_H +#include +#include +#include + #include "pipe/p_screen.h" #include "pipe/p_defines.h" - -struct llvmpipe_screen { +struct llvmpipe_screen +{ struct pipe_screen base; + LLVMModuleRef module; + LLVMExecutionEngineRef engine; + LLVMModuleProviderRef provider; + LLVMPassManagerRef pass; + /* Increments whenever textures are modified. Contexts can track * this. */ diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 125567b677..78c4778580 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -31,6 +31,8 @@ #ifndef LP_STATE_H #define LP_STATE_H +#include + #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" @@ -90,6 +92,23 @@ struct lp_vertex_shader { }; +typedef void +(*lp_blend_func)(float src[][4], + float dst[][4], + float con[][4], + float res[][4]); + + +struct lp_blend_state +{ + struct pipe_blend_state base; + + LLVMValueRef function; + + lp_blend_func jit_function; +}; + + void * llvmpipe_create_blend_state(struct pipe_context *, diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index 041a54f13e..ed4ea3b330 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -1,5 +1,6 @@ /************************************************************************** * + * Copyright 2009 VMware, Inc. * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * @@ -25,19 +26,126 @@ * **************************************************************************/ -/* Authors: Keith Whitwell +/** + * @author Jose Fonseca + * @author Keith Whitwell */ #include "util/u_memory.h" +#include "util/u_debug_dump.h" +#include "lp_screen.h" #include "lp_context.h" #include "lp_state.h" +#include "lp_bld_type.h" +#include "lp_bld_arit.h" +#include "lp_bld_blend.h" + + +static void +blend_generate(struct llvmpipe_screen *screen, + struct lp_blend_state *blend) +{ + union lp_type type; + LLVMTypeRef vec_type; + LLVMTypeRef args[4]; + LLVMValueRef src_ptr; + LLVMValueRef dst_ptr; + LLVMValueRef const_ptr; + LLVMValueRef res_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef src[4]; + LLVMValueRef dst[4]; + LLVMValueRef con[4]; + LLVMValueRef res[4]; + char src_name[5] = "src?"; + char dst_name[5] = "dst?"; + char con_name[5] = "con?"; + char res_name[5] = "res?"; + unsigned i; + + type.value = 0; + type.floating = TRUE; + type.sign = FALSE; + type.norm = TRUE; + type.width = 32; + type.length = 4; + + vec_type = lp_build_vec_type(type); + + args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); + blend->function = LLVMAddFunction(screen->module, "blend", LLVMFunctionType(LLVMVoidType(), args, 4, 0)); + LLVMSetFunctionCallConv(blend->function, LLVMCCallConv); + src_ptr = LLVMGetParam(blend->function, 0); + dst_ptr = LLVMGetParam(blend->function, 1); + const_ptr = LLVMGetParam(blend->function, 2); + res_ptr = LLVMGetParam(blend->function, 3); + + block = LLVMAppendBasicBlock(blend->function, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + con_name[3] = dst_name[3] = src_name[3] = "rgba"[i]; + src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), src_name); + dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), dst_name); + con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), con_name); + } + + lp_build_blend_soa(builder, &blend->base, type, src, dst, con, res); + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res_name[3] = "rgba"[i]; + LLVMSetValueName(res[i], res_name); + LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, "")); + } + + LLVMBuildRetVoid(builder);; + + LLVMDisposeBuilder(builder); +} + void * llvmpipe_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) + const struct pipe_blend_state *base) { - return mem_dup(blend, sizeof(*blend)); + struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); + struct lp_blend_state *blend; + + blend = CALLOC_STRUCT(lp_blend_state); + if(!blend) + return NULL; + + blend->base = *base; + + blend_generate(screen, blend); + + LLVMRunFunctionPassManager(screen->pass, blend->function); + +#if 1 + debug_printf("%s=%s %s=%s %s=%s %s=%s %s=%s %s=%s\n", + "rgb_func", debug_dump_blend_func (blend->base.rgb_func, TRUE), + "rgb_src_factor", debug_dump_blend_factor(blend->base.rgb_src_factor, TRUE), + "rgb_dst_factor", debug_dump_blend_factor(blend->base.rgb_dst_factor, TRUE), + "alpha_func", debug_dump_blend_func (blend->base.alpha_func, TRUE), + "alpha_src_factor", debug_dump_blend_factor(blend->base.alpha_src_factor, TRUE), + "alpha_dst_factor", debug_dump_blend_factor(blend->base.alpha_dst_factor, TRUE)); + LLVMDumpModule(screen->module); + debug_printf("\n"); +#endif + + if(LLVMVerifyFunction(blend->function, LLVMPrintMessageAction)) { + LLVMDumpModule(screen->module); + abort(); + } + + blend->jit_function = (lp_blend_func)LLVMGetPointerToGlobal(screen->engine, blend->function); + + return blend; } void llvmpipe_bind_blend_state( struct pipe_context *pipe, @@ -45,14 +153,23 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->blend = (const struct pipe_blend_state *)blend; + llvmpipe->blend = (struct lp_blend_state *)blend; llvmpipe->dirty |= LP_NEW_BLEND; } void llvmpipe_delete_blend_state(struct pipe_context *pipe, - void *blend) + void *_blend) { + struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); + struct lp_blend_state *blend = (struct lp_blend_state *)_blend; + + if(blend->function) { + if(blend->jit_function) + LLVMFreeMachineCodeForFunction(screen->engine, blend->function); + LLVMDeleteFunction(blend->function); + } + FREE( blend ); } @@ -61,8 +178,11 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, const struct pipe_blend_color *blend_color ) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + unsigned i, j; - llvmpipe->blend_color = *blend_color; + for (i = 0; i < 4; ++i) + for (j = 0; j < QUAD_SIZE; ++j) + llvmpipe->blend_color[i][j] = blend_color->color[i]; llvmpipe->dirty |= LP_NEW_BLEND; } -- cgit v1.2.3