From b642730be93149baa7556e5791393168ab396175 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 15 Feb 2008 17:35:24 +0900 Subject: Code reorganization: move files into their places. This is in a separate commit to ensure renames are properly preserved. --- src/gallium/drivers/softpipe/sp_quad.c | 118 +++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 src/gallium/drivers/softpipe/sp_quad.c (limited to 'src/gallium/drivers/softpipe/sp_quad.c') diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c new file mode 100644 index 0000000000..6bd468a51c --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "pipe/p_shader_tokens.h" + +static void +sp_push_quad_first( + struct softpipe_context *sp, + struct quad_stage *quad ) +{ + quad->next = sp->quad.first; + sp->quad.first = quad; +} + +static void +sp_build_depth_stencil( + struct softpipe_context *sp ) +{ + if (sp->depth_stencil->stencil[0].enabled || + sp->depth_stencil->stencil[1].enabled) { + sp_push_quad_first( sp, sp->quad.stencil_test ); + } + else if (sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf) { + sp_push_quad_first( sp, sp->quad.depth_test ); + } +} + +void +sp_build_quad_pipeline(struct softpipe_context *sp) +{ + boolean early_depth_test = + sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf && + !sp->depth_stencil->alpha.enabled && + sp->fs->shader.output_semantic_name[0] != TGSI_SEMANTIC_POSITION; + + /* build up the pipeline in reverse order... */ + + sp->quad.first = sp->quad.output; + + if (sp->blend->colormask != 0xf) { + sp_push_quad_first( sp, sp->quad.colormask ); + } + + if (sp->blend->blend_enable || + sp->blend->logicop_enable) { + sp_push_quad_first( sp, sp->quad.blend ); + } + + if (sp->framebuffer.num_cbufs == 1) { + /* the usual case: write to exactly one colorbuf */ + sp->current_cbuf = 0; + } + else { + /* insert bufloop stage */ + sp_push_quad_first( sp, sp->quad.bufloop ); + } + + if (sp->depth_stencil->depth.occlusion_count) { + sp_push_quad_first( sp, sp->quad.occlusion ); + } + + if (sp->rasterizer->poly_smooth || + sp->rasterizer->line_smooth || + sp->rasterizer->point_smooth) { + sp_push_quad_first( sp, sp->quad.coverage ); + } + + if (!early_depth_test) { + sp_build_depth_stencil( sp ); + } + + if (sp->depth_stencil->alpha.enabled) { + sp_push_quad_first( sp, sp->quad.alpha_test ); + } + + /* XXX always enable shader? */ + if (1) { + sp_push_quad_first( sp, sp->quad.shade ); + } + + if (early_depth_test) { + sp_build_depth_stencil( sp ); + sp_push_quad_first( sp, sp->quad.earlyz ); + } + + if (sp->rasterizer->poly_stipple_enable) { + sp_push_quad_first( sp, sp->quad.polygon_stipple ); + } +} -- cgit v1.2.3 From 446bfc32a83008e0865ec869bc80b920c907f10f Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 21 Feb 2008 16:56:32 -0700 Subject: gallium: new draw stage for polygon stipple. For hardware without native polygon stipple. Create a 32x32 alpha texture that encodes the stipple pattern. Modify the user's fragment program to sample the texture (with gl_FragCoord) and kill the fragment according to the texel value. Temporarily enabled in softpipe driver, replacing the sp_quad_stipple.c step. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_context.h | 3 + src/gallium/auxiliary/draw/draw_private.h | 1 + src/gallium/auxiliary/draw/draw_pstipple.c | 717 +++++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_validate.c | 6 + src/gallium/drivers/softpipe/sp_context.c | 5 + src/gallium/drivers/softpipe/sp_context.h | 7 + src/gallium/drivers/softpipe/sp_quad.c | 2 + 8 files changed, 742 insertions(+) create mode 100644 src/gallium/auxiliary/draw/draw_pstipple.c (limited to 'src/gallium/drivers/softpipe/sp_quad.c') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index b7e71ed3ec..c9980f0b83 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -16,6 +16,7 @@ C_SOURCES = \ draw_flatshade.c \ draw_offset.c \ draw_prim.c \ + draw_pstipple.c \ draw_stipple.c \ draw_twoside.c \ draw_unfilled.c \ diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 43b58bc056..c25301f71d 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -102,6 +102,9 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); void draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); +void +draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe); + int draw_find_vs_output(struct draw_context *draw, diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 8c469e74b7..6abced139b 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -182,6 +182,7 @@ struct draw_context struct draw_stage *stipple; struct draw_stage *aapoint; struct draw_stage *aaline; + struct draw_stage *pstipple; struct draw_stage *wide; struct draw_stage *rasterize; } pipeline; diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c new file mode 100644 index 0000000000..4048abf856 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -0,0 +1,717 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Polygon stipple stage: implement polygon stipple with texture map and + * fragment program. The fragment program samples the texture and does + * a fragment kill for the stipple-failing fragments. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct pstip_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *pstip_fs; +}; + + +/** + * Subclass of draw_stage + */ +struct pstip_stage +{ + struct draw_stage stage; + + void *sampler_cso; + struct pipe_texture *texture; + uint sampler_unit; + + /* + * Currently bound state + */ + struct pstip_fragment_shader *fs; + struct { + void *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + const struct pipe_poly_stipple *stipple; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + + void (*driver_set_sampler_texture)(struct pipe_context *, + unsigned sampler, + struct pipe_texture *); + + void (*driver_set_polygon_stipple)(struct pipe_context *, + const struct pipe_poly_stipple *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct pstip_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int wincoordInput; + int maxInput; + int maxSampler; /**< max sampler index found */ + int texTemp; /**< temp registers */ + int numImmed; + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +pstip_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + if ((int) decl->u.DeclarationRange.Last > pctx->maxSampler) + pctx->maxSampler = (int) decl->u.DeclarationRange.Last; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + pctx->maxInput = MAX2(pctx->maxInput, decl->u.DeclarationRange.Last); + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) + pctx->wincoordInput = (int) decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + pctx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +static void +pstip_transform_immed(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *immed) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + pctx->numImmed++; +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +pstip_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (pctx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction newInst; + uint i; + int wincoordInput; + const int sampler = pctx->maxSampler + 1; + + if (pctx->wincoordInput < 0) + wincoordInput = pctx->maxInput + 1; + else + wincoordInput = pctx->wincoordInput; + + /* find one free temp reg */ + for (i = 0; i < 32; i++) { + if ((pctx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (pctx->texTemp < 0) + pctx->texTemp = i; + else + break; + } + } + assert(pctx->texTemp >= 0); + + if (pctx->wincoordInput < 0) { + /* declare new position input reg */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; + decl.Semantic.SemanticIndex = 0; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = wincoordInput; + ctx->emit_declaration(ctx, &decl); + } + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = sampler; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = pctx->texTemp; + ctx->emit_declaration(ctx, &decl); + + /* emit immediate = {1/32, 1/32, 1, 1} + * The index/position of this immediate will be pctx->numImmed + */ + { + static const float value[4] = { 1.0/32, 1.0/32, 1.0, 1.0 }; + struct tgsi_full_immediate immed; + uint size = 4; + immed = tgsi_default_full_immediate(); + immed.Immediate.Size = 1 + size; /* one for the token itself */ + immed.u.ImmediateFloat32 = (struct tgsi_immediate_float32 *) value; + ctx->emit_immediate(ctx, &immed); + } + + pctx->firstInstruction = FALSE; + + + /* + * Insert new MUL/TEX/KILP instructions at start of program + * Take gl_FragCoord, divide by 32 (stipple size), sample the + * texture and kill fragment if needed. + * + * We'd like to use non-normalized texcoords to index into a RECT + * texture, but we can only use GL_REPEAT wrap mode with normalized + * texcoords. Darn. + */ + + /* MUL texTemp, INPUT[wincoord], 1/32; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; + newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed; + ctx->emit_instruction(ctx, &newInst); + + /* TEX texTemp, texTemp, sampler; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = sampler; + ctx->emit_instruction(ctx, &newInst); + + /* KILP texTemp; # if texTemp < 0, KILL fragment */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + } + + /* emit this instruction */ + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for doing polygon stipple. + * This will be the user's shader prefixed with a TEX and KIL instruction. + */ +static void +generate_pstip_fs(struct pstip_stage *pstip) +{ + const struct pipe_shader_state *orig_fs = &pstip->fs->state; + /*struct draw_context *draw = pstip->stage.draw;*/ + struct pipe_shader_state pstip_fs; + struct pstip_transform_context transform; + +#define MAX 1000 + + pstip_fs = *orig_fs; /* copy to init */ + pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.wincoordInput = -1; + transform.maxInput = -1; + transform.maxSampler = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = pstip_transform_inst; + transform.base.transform_declaration = pstip_transform_decl; + transform.base.transform_immediate = pstip_transform_immed; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) pstip_fs.tokens, + MAX, &transform.base); + +#if 1 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(pstip_fs.tokens, 0); +#endif + + pstip->sampler_unit = transform.maxSampler + 1; + + if (transform.wincoordInput < 0) { + pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; + pstip_fs.input_semantic_index[pstip_fs.num_inputs] = transform.maxInput; + pstip_fs.num_inputs++; + } + + pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); +} + + +/** + * Load texture image with current stipple pattern. + */ +static void +pstip_update_texture(struct pstip_stage *pstip) +{ + static const uint bit31 = 1 << 31; + struct pipe_context *pipe = pstip->pipe; + struct pipe_surface *surface; + const uint *stipple = pstip->state.stipple->stipple; + uint i, j; + ubyte *data; + + surface = pipe->get_tex_surface(pipe, pstip->texture, 0, 0, 0); + data = pipe_surface_map(surface); + + /* + * Load alpha texture. + * Note: 0 means keep the fragment, 255 means kill it. + * We'll negate the texel value and use KILP which kills if value + * is negative. + */ + for (i = 0; i < 32; i++) { + for (j = 0; j < 32; j++) { + if (stipple[i] & (bit31 >> j)) { + /* fragment "on" */ + data[i * surface->pitch + j] = 0; + } + else { + /* fragment "off" */ + data[i * surface->pitch + j] = 255; + } + } + } + + /* unmap */ + pipe_surface_unmap(surface); + pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, pstip->texture); +} + + +/** + * Create the texture map we'll use for stippling. + */ +static void +pstip_create_texture(struct pstip_stage *pstip) +{ + struct pipe_context *pipe = pstip->pipe; + struct pipe_texture texTemp; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.last_level = 0; + texTemp.width[0] = 32; + texTemp.height[0] = 32; + texTemp.depth[0] = 1; + texTemp.cpp = 1; + + pstip->texture = pipe->texture_create(pipe, &texTemp); + + //pstip_update_texture(pstip); +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static void +pstip_create_sampler(struct pstip_stage *pstip) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = pstip->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = 0.0f; + + pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_pstip_fragment_shader(struct pstip_stage *pstip) +{ + if (!pstip->fs->pstip_fs) { + generate_pstip_fs(pstip); + } + pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); +} + + + +static INLINE struct pstip_stage * +pstip_stage( struct draw_stage *stage ) +{ + return (struct pstip_stage *) stage; +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + + +static void +passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + + +static void +pstip_first_tri(struct draw_stage *stage, struct prim_header *header) +{ + struct pstip_stage *pstip = pstip_stage(stage); + struct draw_context *draw = stage->draw; + struct pipe_context *pipe = pstip->pipe; + + assert(draw->rasterizer->poly_stipple_enable); + + /* + * Bind our fragprog, sampler and texture + */ + bind_pstip_fragment_shader(pstip); + + pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, pstip->sampler_cso); + pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, pstip->texture); + + /* now really draw first line */ + stage->tri = passthrough_tri; + stage->tri(stage, header); +} + + +static void +pstip_flush(struct draw_stage *stage, unsigned flags) +{ + /*struct draw_context *draw = stage->draw;*/ + struct pstip_stage *pstip = pstip_stage(stage); + struct pipe_context *pipe = pstip->pipe; + + stage->tri = pstip_first_tri; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); + + /* XXX restore original texture, sampler state */ + pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, + pstip->state.sampler[pstip->sampler_unit]); + pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, + pstip->state.texture[pstip->sampler_unit]); +} + + +static void +pstip_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +pstip_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct pstip_stage * +draw_pstip_stage(struct draw_context *draw) +{ + struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); + + draw_alloc_temp_verts( &pstip->stage, 8 ); + + pstip->stage.draw = draw; + pstip->stage.next = NULL; + pstip->stage.point = passthrough_point; + pstip->stage.line = passthrough_line; + pstip->stage.tri = pstip_first_tri; + pstip->stage.flush = pstip_flush; + pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; + pstip->stage.destroy = pstip_destroy; + + return pstip; +} + + +/* + * XXX temporary? solution to mapping a pipe_context to a pstip_stage. + */ + +#define MAX_CONTEXTS 10 + +static struct pipe_context *Pipe[MAX_CONTEXTS]; +static struct pstip_stage *Stage[MAX_CONTEXTS]; +static uint NumContexts; + +static void +add_pstip_pipe_context(struct pipe_context *pipe, struct pstip_stage *pstip) +{ + assert(NumContexts < MAX_CONTEXTS); + Pipe[NumContexts] = pipe; + Stage[NumContexts] = pstip; + NumContexts++; +} + +static struct pstip_stage * +pstip_stage_from_pipe(struct pipe_context *pipe) +{ + uint i; + for (i = 0; i < NumContexts; i++) { + if (Pipe[i] == pipe) + return Stage[i]; + } + assert(0); + return NULL; +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +pstip_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); + } + + return aafs; +} + + +static void +pstip_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* save current */ + pstip->fs = aafs; + /* pass-through */ + pstip->driver_bind_fs_state(pstip->pipe, aafs->driver_fs); +} + + +static void +pstip_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* pass-through */ + pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +pstip_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.sampler[unit] = sampler; + /* pass-through */ + pstip->driver_bind_sampler_state(pstip->pipe, unit, sampler); +} + + +static void +pstip_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, struct pipe_texture *texture) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.texture[sampler] = texture; + /* pass-through */ + pstip->driver_set_sampler_texture(pstip->pipe, sampler, texture); +} + + +static void +pstip_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.stipple = stipple; + /* pass-through */ + pstip->driver_set_polygon_stipple(pstip->pipe, stipple); + + pstip_update_texture(pstip); +} + + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +void +draw_install_pstipple_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct pstip_stage *pstip; + + /* + * Create / install AA line drawing / prim stage + */ + pstip = draw_pstip_stage( draw ); + assert(pstip); + draw->pipeline.pstipple = &pstip->stage; + + pstip->pipe = pipe; + + /* create special texture, sampler state */ + pstip_create_texture(pstip); + pstip_create_sampler(pstip); + + /* save original driver functions */ + pstip->driver_create_fs_state = pipe->create_fs_state; + pstip->driver_bind_fs_state = pipe->bind_fs_state; + pstip->driver_delete_fs_state = pipe->delete_fs_state; + + pstip->driver_bind_sampler_state = pipe->bind_sampler_state; + pstip->driver_set_sampler_texture = pipe->set_sampler_texture; + pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; + + /* override the driver's functions */ + pipe->create_fs_state = pstip_create_fs_state; + pipe->bind_fs_state = pstip_bind_fs_state; + pipe->delete_fs_state = pstip_delete_fs_state; + + pipe->bind_sampler_state = pstip_bind_sampler_state; + pipe->set_sampler_texture = pstip_set_sampler_texture; + pipe->set_polygon_stipple = pstip_set_polygon_stipple; + + add_pstip_pipe_context(pipe, pstip); +} diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 5167ef1e75..efd6793f2b 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -82,6 +82,12 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) precalc_flat = 1; /* only needed for lines really */ } + if (draw->rasterizer->poly_stipple_enable + && draw->pipeline.pstipple) { + draw->pipeline.pstipple->next = next; + next = draw->pipeline.pstipple; + } + if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { draw->pipeline.unfilled->next = next; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index de4c6c18e7..2cdf3c75bf 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -332,6 +332,11 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); +#if USE_DRAW_STAGE_PSTIPPLE + /* Do polygon stipple w/ texture map + frag prog? */ + draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); +#endif + /* sp_prim_setup can do wide points (don't convert to quads) */ draw_convert_wide_points(softpipe->draw, FALSE); diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index a50cee7648..feeafc7084 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -39,6 +39,13 @@ #include "sp_quad.h" +/** + * This is a temporary variable for testing draw-stage polygon stipple. + * If zero, do stipple in sp_quad_stipple.c + */ +#define USE_DRAW_STAGE_PSTIPPLE 1 + + struct softpipe_winsys; struct softpipe_vbuf_render; struct draw_context; diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 6bd468a51c..15b5594547 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -112,7 +112,9 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp_push_quad_first( sp, sp->quad.earlyz ); } +#if !USE_DRAW_STAGE_PSTIPPLE if (sp->rasterizer->poly_stipple_enable) { sp_push_quad_first( sp, sp->quad.polygon_stipple ); } +#endif } -- cgit v1.2.3 From 012391357fcbefd2b34e999eed91a129d5efd77c Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 23 Feb 2008 16:17:17 -0700 Subject: gallium: disable early Z test if fragment shader contains KIL instruction. Use tgsi_scan_shader() to determine if the fragment shader uses KIL or writes fragment.z --- src/gallium/drivers/softpipe/sp_quad.c | 5 +++-- src/gallium/drivers/softpipe/sp_state.h | 3 +++ src/gallium/drivers/softpipe/sp_state_fs.c | 22 +++++++++++++--------- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/softpipe/sp_quad.c') diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 15b5594547..142dbcc771 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -56,11 +56,12 @@ sp_build_depth_stencil( void sp_build_quad_pipeline(struct softpipe_context *sp) { - boolean early_depth_test = + boolean early_depth_test = sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && - sp->fs->shader.output_semantic_name[0] != TGSI_SEMANTIC_POSITION; + !sp->fs->uses_kill && + !sp->fs->writes_z; /* build up the pipeline in reverse order... */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 6ff31e601f..5aaa9e346b 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -63,6 +63,9 @@ struct tgsi_exec_machine; struct sp_fragment_shader { struct pipe_shader_state shader; + boolean uses_kill; + boolean writes_z; + void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers); diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index b0238f8173..b184ac61bb 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -36,6 +36,7 @@ #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_scan.h" void * @@ -44,21 +45,24 @@ softpipe_create_fs_state(struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state; + struct tgsi_shader_info info; + + tgsi_scan_shader(templ->tokens, &info); if (softpipe->dump_fs) tgsi_dump(templ->tokens, 0); state = softpipe_create_fs_llvm( softpipe, templ ); - if (state) - return state; - - state = softpipe_create_fs_sse( softpipe, templ ); - if (state) - return state; - - state = softpipe_create_fs_exec( softpipe, templ ); - + if (!state) { + state = softpipe_create_fs_sse( softpipe, templ ); + if (!state) { + state = softpipe_create_fs_exec( softpipe, templ ); + } + } assert(state); + state->uses_kill = (info.opcode_count[TGSI_OPCODE_KIL] || + info.opcode_count[TGSI_OPCODE_KILP]); + state->writes_z = info.writes_z; return state; } -- cgit v1.2.3 From f74279002a0ae0b106bd5410487ef9c0e9b1d8b6 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 26 Feb 2008 10:13:39 -0700 Subject: gallium: added tgsi_shader_field to sp_fragment_shader Use the shader semantic info from there, instead of from pipe_shader_state. Carry this idea to draw module and other drivers... --- src/gallium/drivers/softpipe/sp_fs_llvm.c | 2 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 18 +++++++++--------- src/gallium/drivers/softpipe/sp_quad.c | 2 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 4 ++-- src/gallium/drivers/softpipe/sp_state.h | 6 ++++-- src/gallium/drivers/softpipe/sp_state_derived.c | 9 +++++---- src/gallium/drivers/softpipe/sp_state_fs.c | 16 ++++++++++------ 7 files changed, 32 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers/softpipe/sp_quad.c') diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 34b2b7d4e2..07d058155f 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -96,7 +96,7 @@ shade_quad_llvm(struct quad_stage *qs, if (qss->colorOutSlot >= 0) { unsigned i; /* XXX need to handle multiple color outputs someday */ - allvmrt(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot] + allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] == TGSI_SEMANTIC_COLOR); for (i = 0; i < QUAD_SIZE; ++i) { quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0]; diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index b6a3fddb29..17284539b0 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -514,7 +514,7 @@ setup_fragcoord_coeff(struct setup_stage *setup, uint slot) static void setup_tri_coefficients( struct setup_stage *setup ) { struct softpipe_context *softpipe = setup->softpipe; - const struct pipe_shader_state *fs = &softpipe->fs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; @@ -525,7 +525,7 @@ static void setup_tri_coefficients( struct setup_stage *setup ) /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { const uint vertSlot = vinfo->src_index[fragSlot]; uint j; @@ -549,7 +549,7 @@ static void setup_tri_coefficients( struct setup_stage *setup ) assert(0); } - if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; setup->coef[fragSlot].dadx[1] = 0.0; @@ -757,7 +757,7 @@ static INLINE void setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) { struct softpipe_context *softpipe = setup->softpipe; - const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; @@ -779,7 +779,7 @@ setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { const uint vertSlot = vinfo->src_index[fragSlot]; uint j; @@ -803,7 +803,7 @@ setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) assert(0); } - if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; setup->coef[fragSlot].dadx[1] = 0.0; @@ -967,7 +967,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); struct softpipe_context *softpipe = setup->softpipe; - const struct pipe_shader_state *fs = &softpipe->fs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const struct vertex_header *v0 = prim->v[0]; const int sizeAttr = setup->softpipe->psize_slot; const float size @@ -1002,7 +1002,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) const_coeff(setup, &setup->posCoef, 0, 2); const_coeff(setup, &setup->posCoef, 0, 3); - for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { const uint vertSlot = vinfo->src_index[fragSlot]; uint j; @@ -1025,7 +1025,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) assert(0); } - if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; setup->coef[fragSlot].dadx[1] = 0.0; diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 142dbcc771..0aaf94021f 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -61,7 +61,7 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && !sp->fs->uses_kill && - !sp->fs->writes_z; + !sp->fs->info.writes_z; /* build up the pipeline in reverse order... */ diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 2f40e09d5c..1794fb5a61 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -91,7 +91,7 @@ shade_quad( /* store result color */ if (qss->colorOutSlot >= 0) { /* XXX need to handle multiple color outputs someday */ - assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot] + assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] == TGSI_SEMANTIC_COLOR); memcpy( quad->outputs.color, @@ -149,7 +149,7 @@ static void shade_begin(struct quad_stage *qs) qss->colorOutSlot = -1; qss->depthOutSlot = -1; for (i = 0; i < qss->stage.softpipe->fs->shader.num_outputs; i++) { - switch (qss->stage.softpipe->fs->shader.output_semantic_name[i]) { + switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: qss->depthOutSlot = i; break; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 5aaa9e346b..b1070e185a 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -32,6 +32,7 @@ #define SP_STATE_H #include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" #define SP_NEW_VIEWPORT 0x1 @@ -61,10 +62,11 @@ struct tgsi_exec_machine; * This is starting to look an awful lot like a quad pipeline stage... */ struct sp_fragment_shader { - struct pipe_shader_state shader; + struct pipe_shader_state shader; + + struct tgsi_shader_info info; boolean uses_kill; - boolean writes_z; void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 4c6313001f..68702eafcf 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -62,6 +62,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ const struct pipe_shader_state *vs = &softpipe->vs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const struct pipe_shader_state *fs = &softpipe->fs->shader; const enum interp_mode colorInterp = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; @@ -83,9 +84,9 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * from the vertex shader. */ vinfo->num_attribs = 0; - for (i = 0; i < fs->num_inputs; i++) { + for (i = 0; i < spfs->info.num_inputs; i++) { int src; - switch (fs->input_semantic_name[i]) { + switch (spfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_POSITION, 0); @@ -94,7 +95,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) case TGSI_SEMANTIC_COLOR: src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR, - fs->input_semantic_index[i]); + spfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; @@ -106,7 +107,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, - fs->input_semantic_index[i]); + spfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index b184ac61bb..2715dca0f0 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -45,13 +45,12 @@ softpipe_create_fs_state(struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state; - struct tgsi_shader_info info; - - tgsi_scan_shader(templ->tokens, &info); + /* debug */ if (softpipe->dump_fs) tgsi_dump(templ->tokens, 0); + /* codegen */ state = softpipe_create_fs_llvm( softpipe, templ ); if (!state) { state = softpipe_create_fs_sse( softpipe, templ ); @@ -59,10 +58,15 @@ softpipe_create_fs_state(struct pipe_context *pipe, state = softpipe_create_fs_exec( softpipe, templ ); } } + assert(state); - state->uses_kill = (info.opcode_count[TGSI_OPCODE_KIL] || - info.opcode_count[TGSI_OPCODE_KILP]); - state->writes_z = info.writes_z; + + /* get/save the summary info for this shader */ + tgsi_scan_shader(templ->tokens, &state->info); + + /* convenience field */ + state->uses_kill = (state->info.opcode_count[TGSI_OPCODE_KIL] || + state->info.opcode_count[TGSI_OPCODE_KILP]); return state; } -- cgit v1.2.3 From c66ec5c7a2966df3e3456dfca3eb17c294b30dd5 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 27 Feb 2008 15:06:55 -0700 Subject: gallium: remove uses_kill field from softpipe_shader --- src/gallium/drivers/softpipe/sp_quad.c | 2 +- src/gallium/drivers/softpipe/sp_state.h | 2 -- src/gallium/drivers/softpipe/sp_state_fs.c | 3 --- 3 files changed, 1 insertion(+), 6 deletions(-) (limited to 'src/gallium/drivers/softpipe/sp_quad.c') diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 0aaf94021f..8603c1a367 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -60,7 +60,7 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && - !sp->fs->uses_kill && + !sp->fs->info.uses_kill && !sp->fs->info.writes_z; /* build up the pipeline in reverse order... */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index b1070e185a..895976467c 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -66,8 +66,6 @@ struct sp_fragment_shader { struct tgsi_shader_info info; - boolean uses_kill; - void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers); diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 2715dca0f0..eb641ed321 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -64,9 +64,6 @@ softpipe_create_fs_state(struct pipe_context *pipe, /* get/save the summary info for this shader */ tgsi_scan_shader(templ->tokens, &state->info); - /* convenience field */ - state->uses_kill = (state->info.opcode_count[TGSI_OPCODE_KIL] || - state->info.opcode_count[TGSI_OPCODE_KILP]); return state; } -- cgit v1.2.3 From c7daa68ca312cc98abe351be2fef8d8246929627 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 7 Apr 2008 21:59:12 -0600 Subject: gallium: begin reworking quad stages for multiple color outputs --- src/gallium/drivers/softpipe/sp_context.h | 2 - src/gallium/drivers/softpipe/sp_fs_llvm.c | 16 +- src/gallium/drivers/softpipe/sp_headers.h | 4 +- src/gallium/drivers/softpipe/sp_quad.c | 9 - src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 5 +- src/gallium/drivers/softpipe/sp_quad_blend.c | 1192 +++++++++++---------- src/gallium/drivers/softpipe/sp_quad_bufloop.c | 4 +- src/gallium/drivers/softpipe/sp_quad_colormask.c | 59 +- src/gallium/drivers/softpipe/sp_quad_coverage.c | 15 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 60 +- src/gallium/drivers/softpipe/sp_quad_output.c | 39 +- 11 files changed, 734 insertions(+), 671 deletions(-) (limited to 'src/gallium/drivers/softpipe/sp_quad.c') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index dc9d0e6d5d..9724d185d1 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -138,8 +138,6 @@ struct softpipe_context { struct draw_stage *vbuf; struct softpipe_vbuf_render *vbuf_render; - uint current_cbuf; /**< current color buffer being written to */ - struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; struct softpipe_tile_cache *zsbuf_cache; diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 07d058155f..6e1d9280bb 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -99,19 +99,19 @@ shade_quad_llvm(struct quad_stage *qs, allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] == TGSI_SEMANTIC_COLOR); for (i = 0; i < QUAD_SIZE; ++i) { - quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0]; - quad->outputs.color[1][i] = dests[i][qss->colorOutSlot][1]; - quad->outputs.color[2][i] = dests[i][qss->colorOutSlot][2]; - quad->outputs.color[3][i] = dests[i][qss->colorOutSlot][3]; + quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0]; + quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1]; + quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2]; + quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3]; } } #if DLLVM for (int i = 0; i < QUAD_SIZE; ++i) { debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, - quad->outputs.color[0][i], - quad->outputs.color[1][i], - quad->outputs.color[2][i], - quad->outputs.color[3][i]); + quad->outputs.color[0][0][i], + quad->outputs.color[0][1][i], + quad->outputs.color[0][2][i], + quad->outputs.color[0][3][i]); } #endif diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h index 9cf8222133..3d9ede69bb 100644 --- a/src/gallium/drivers/softpipe/sp_headers.h +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -31,6 +31,7 @@ #ifndef SP_HEADERS_H #define SP_HEADERS_H +#include "pipe/p_state.h" #include "tgsi/exec/tgsi_exec.h" #define PRIM_POINT 1 @@ -66,7 +67,8 @@ struct quad_header { unsigned prim:2; /**< PRIM_POINT, LINE, TRI */ struct { - float color[NUM_CHANNELS][QUAD_SIZE]; /* rrrr, gggg, bbbb, aaaa */ + /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ + float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; float depth[QUAD_SIZE]; } outputs; diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 8603c1a367..bc83d78ea1 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -76,15 +76,6 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp_push_quad_first( sp, sp->quad.blend ); } - if (sp->framebuffer.num_cbufs == 1) { - /* the usual case: write to exactly one colorbuf */ - sp->current_cbuf = 0; - } - else { - /* insert bufloop stage */ - sp_push_quad_first( sp, sp->quad.bufloop ); - } - if (sp->depth_stencil->depth.occlusion_count) { sp_push_quad_first( sp, sp->quad.occlusion ); } diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 318916fe30..7a42b08ef5 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -16,7 +16,8 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) struct softpipe_context *softpipe = qs->softpipe; const float ref = softpipe->depth_stencil->alpha.ref; unsigned passMask = 0x0, j; - const float *aaaa = quad->outputs.color[3]; + const uint cbuf = 0; /* only output[0].alpha is tested */ + const float *aaaa = quad->outputs.color[cbuf][3]; switch (softpipe->depth_stencil->alpha.func) { case PIPE_FUNC_NEVER: @@ -25,7 +26,7 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) case PIPE_FUNC_LESS: /* * If mask were an array [4] we could do this SIMD-style: - * passMask = (quad->outputs.color[3] <= vec4(ref)); + * passMask = (quad->outputs.color[0][3] <= vec4(ref)); */ for (j = 0; j < QUAD_SIZE; j++) { if (aaaa[j] < ref) { diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 0b79cfa1ed..8be8025f40 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -101,114 +101,119 @@ static void logicop_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - float dest[4][QUAD_SIZE]; - ubyte src[4][4], dst[4][4], res[4][4]; - uint *src4 = (uint *) src; - uint *dst4 = (uint *) dst; - uint *res4 = (uint *) res; - struct softpipe_cached_tile * - tile = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[softpipe->current_cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color; - uint i, j; - - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float dest[4][QUAD_SIZE]; + ubyte src[4][4], dst[4][4], res[4][4]; + uint *src4 = (uint *) src; + uint *dst4 = (uint *) dst; + uint *res4 = (uint *) res; + struct softpipe_cached_tile * + tile = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color[cbuf]; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } } - } - /* convert to ubyte */ - for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */ - - UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */ - } + /* convert to ubyte */ + for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */ + + UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */ + } - switch (softpipe->blend->logicop_func) { - case PIPE_LOGICOP_CLEAR: - for (j = 0; j < 4; j++) - res4[j] = 0; - break; - case PIPE_LOGICOP_NOR: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] | dst4[j]); - break; - case PIPE_LOGICOP_AND_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_COPY_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j]; - break; - case PIPE_LOGICOP_AND_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & ~dst4[j]; - break; - case PIPE_LOGICOP_INVERT: - for (j = 0; j < 4; j++) - res4[j] = ~dst4[j]; - break; - case PIPE_LOGICOP_XOR: - for (j = 0; j < 4; j++) - res4[j] = dst4[j] ^ src4[j]; - break; - case PIPE_LOGICOP_NAND: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] & dst4[j]); - break; - case PIPE_LOGICOP_AND: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_EQUIV: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] ^ dst4[j]); - break; - case PIPE_LOGICOP_NOOP: - for (j = 0; j < 4; j++) - res4[j] = dst4[j]; - break; - case PIPE_LOGICOP_OR_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_COPY: - for (j = 0; j < 4; j++) - res4[j] = src4[j]; - break; - case PIPE_LOGICOP_OR_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | ~dst4[j]; - break; - case PIPE_LOGICOP_OR: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_SET: - for (j = 0; j < 4; j++) - res4[j] = ~0; - break; - default: - assert(0); - } + switch (softpipe->blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: + for (j = 0; j < 4; j++) + res4[j] = 0; + break; + case PIPE_LOGICOP_NOR: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] | dst4[j]); + break; + case PIPE_LOGICOP_AND_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_COPY_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j]; + break; + case PIPE_LOGICOP_AND_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & ~dst4[j]; + break; + case PIPE_LOGICOP_INVERT: + for (j = 0; j < 4; j++) + res4[j] = ~dst4[j]; + break; + case PIPE_LOGICOP_XOR: + for (j = 0; j < 4; j++) + res4[j] = dst4[j] ^ src4[j]; + break; + case PIPE_LOGICOP_NAND: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] & dst4[j]); + break; + case PIPE_LOGICOP_AND: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_EQUIV: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] ^ dst4[j]); + break; + case PIPE_LOGICOP_NOOP: + for (j = 0; j < 4; j++) + res4[j] = dst4[j]; + break; + case PIPE_LOGICOP_OR_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_COPY: + for (j = 0; j < 4; j++) + res4[j] = src4[j]; + break; + case PIPE_LOGICOP_OR_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | ~dst4[j]; + break; + case PIPE_LOGICOP_OR: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_SET: + for (j = 0; j < 4; j++) + res4[j] = ~0; + break; + default: + assert(0); + } - for (j = 0; j < 4; j++) { - quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]); - quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]); - quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]); - quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]); + for (j = 0; j < 4; j++) { + quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]); + quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]); + quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]); + quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]); + } } /* pass quad to next stage */ @@ -221,504 +226,511 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) static void blend_quad(struct quad_stage *qs, struct quad_header *quad) { - struct softpipe_context *softpipe = qs->softpipe; static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; - float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[softpipe->current_cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color; - uint i, j; - - if (softpipe->blend->logicop_enable) { - logicop_quad(qs, quad); - return; - } - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color[cbuf]; + uint i, j; + + if (softpipe->blend->logicop_enable) { + logicop_quad(qs, quad); + return; } - } - /* - * Compute src/first term RGB - */ - switch (softpipe->blend->rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[0], quadColor[0]); /* R */ - VEC4_COPY(source[1], quadColor[1]); /* G */ - VEC4_COPY(source[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - { - const float *alpha = quadColor[3]; - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - { - const float *alpha = dest[3]; - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - { - const float *alpha = quadColor[3]; - float diff[4]; - VEC4_SUB(diff, one, dest[3]); - VEC4_MIN(source[0], alpha, diff); /* R */ - VEC4_MIN(source[1], alpha, diff); /* G */ - VEC4_MIN(source[2], alpha, diff); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], comp); /* R */ - VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], comp); /* G */ - VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float alpha[4]; - VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[0], zero); /* R */ - VEC4_COPY(source[1], zero); /* G */ - VEC4_COPY(source[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, quadColor[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - { - float inv_comp[4]; - /* R */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(source[0], quadColor[0], inv_comp); - /* G */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(source[1], quadColor[1], inv_comp); - /* B */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(source[2], quadColor[2], inv_comp); - } - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_alpha[4]; - VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - assert(0); /* to do */ - break; - default: - assert(0); - } - - /* - * Compute src/first term A - */ - switch (softpipe->blend->alpha_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - { - const float *alpha = quadColor[3]; - VEC4_MUL(source[3], quadColor[3], alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - { - const float *alpha = quadColor[3]; - float diff[4]; - VEC4_SUB(diff, one, dest[3]); - VEC4_MIN(source[3], alpha, diff); /* A */ - } - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(source[3], quadColor[3], comp); /* A */ + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, quadColor[3]); - VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - /* A */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(source[3], quadColor[3], inv_comp); - } - break; - default: - assert(0); - } - - - /* - * Compute dest/second term RGB - */ - switch (softpipe->blend->rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ - VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ - VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ - VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ - VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float one_minus_alpha[QUAD_SIZE]; - VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ - VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ - VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[3]); /* A */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - { - float inv_comp[4]; - /* R */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(dest[0], dest[0], inv_comp); - /* G */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(dest[1], dest[1], inv_comp); - /* B */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(dest[2], dest[2], inv_comp); + + /* + * Compute src/first term RGB + */ + switch (softpipe->blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[0], quadColor[0]); /* R */ + VEC4_COPY(source[1], quadColor[1]); /* G */ + VEC4_COPY(source[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + { + const float *alpha = dest[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(source[0], alpha, diff); /* R */ + VEC4_MIN(source[1], alpha, diff); /* G */ + VEC4_MIN(source[2], alpha, diff); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float alpha[4]; + VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(source[0], quadColor[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(source[1], quadColor[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(source[2], quadColor[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_alpha[4]; + VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + assert(0); /* to do */ + break; + default: + assert(0); } - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[0], dest[0], inv_comp); - VEC4_MUL(dest[1], dest[1], inv_comp); - VEC4_MUL(dest[2], dest[2], inv_comp); + + /* + * Compute src/first term A + */ + switch (softpipe->blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(source[3], alpha, diff); /* A */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(source[3], quadColor[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + /* A */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[3], quadColor[3], inv_comp); + } + break; + default: + assert(0); } - break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - default: - assert(0); - } - - /* - * Compute dest/second term A - */ - switch (softpipe->blend->alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[3], dest[3], comp); /* A */ + + + /* + * Compute dest/second term RGB + */ + switch (softpipe->blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ + VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ + VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ + VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ + VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[3]); /* A */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(dest[0], dest[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(dest[1], dest[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + default: + assert(0); } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float one_minus_alpha[QUAD_SIZE]; - VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + + /* + * Compute dest/second term A + */ + switch (softpipe->blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[3], dest[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[3], dest[3], inv_comp); + } + break; + default: + assert(0); } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + + /* + * Combine RGB terms + */ + switch (softpipe->blend->rgb_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */ + VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */ + VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */ + VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */ + VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + break; + default: + assert(0); } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[3], dest[3], inv_comp); + + /* + * Combine A terms + */ + switch (softpipe->blend->alpha_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + break; + default: + assert(0); } - break; - default: - assert(0); - } - - /* - * Combine RGB terms - */ - switch (softpipe->blend->rgb_func) { - case PIPE_BLEND_ADD: - VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */ - VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */ - VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */ - VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */ - VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ - break; - default: - assert(0); - } - - /* - * Combine A terms - */ - switch (softpipe->blend->alpha_func) { - case PIPE_BLEND_ADD: - VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ - break; - default: - assert(0); - } - + + } /* cbuf loop */ + /* pass blended quad to next stage */ qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c index 2ae4e22a7d..b3db428ef1 100644 --- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -13,7 +13,7 @@ static void cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - float tmp[4][QUAD_SIZE]; + float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE]; unsigned i; assert(sizeof(quad->outputs.color) == sizeof(tmp)); @@ -30,7 +30,9 @@ cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { /* set current cbuffer */ +#if 0 /* obsolete & going away */ softpipe->current_cbuf = i; +#endif /* pass blended quad to next stage */ qs->next->run(qs->next, quad); diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index 1f09d900ca..7fe080990b 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -47,38 +47,43 @@ static void colormask_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - float dest[4][QUAD_SIZE]; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[softpipe->current_cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color; - uint i, j; - - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color[cbuf]; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } } - } - /* R */ - if (!(softpipe->blend->colormask & PIPE_MASK_R)) - COPY_4V(quadColor[0], dest[0]); + /* R */ + if (!(softpipe->blend->colormask & PIPE_MASK_R)) + COPY_4V(quadColor[0], dest[0]); - /* G */ - if (!(softpipe->blend->colormask & PIPE_MASK_G)) - COPY_4V(quadColor[1], dest[1]); + /* G */ + if (!(softpipe->blend->colormask & PIPE_MASK_G)) + COPY_4V(quadColor[1], dest[1]); - /* B */ - if (!(softpipe->blend->colormask & PIPE_MASK_B)) - COPY_4V(quadColor[2], dest[2]); + /* B */ + if (!(softpipe->blend->colormask & PIPE_MASK_B)) + COPY_4V(quadColor[2], dest[2]); - /* A */ - if (!(softpipe->blend->colormask & PIPE_MASK_A)) - COPY_4V(quadColor[3], dest[3]); + /* A */ + if (!(softpipe->blend->colormask & PIPE_MASK_A)) + COPY_4V(quadColor[3], dest[3]); + } /* pass quad to next stage */ qs->next->run(qs->next, quad); diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index b3d3fae22f..dd5ebb2296 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -50,12 +50,17 @@ coverage_quad(struct quad_stage *qs, struct quad_header *quad) if ((softpipe->rasterizer->poly_smooth && quad->prim == PRIM_TRI) || (softpipe->rasterizer->line_smooth && quad->prim == PRIM_LINE) || (softpipe->rasterizer->point_smooth && quad->prim == PRIM_POINT)) { - float (*quadColor)[4] = quad->outputs.color; - unsigned j; - for (j = 0; j < QUAD_SIZE; j++) { - assert(quad->coverage[j] >= 0.0); - assert(quad->coverage[j] <= 1.0); + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float (*quadColor)[4] = quad->outputs.color[cbuf]; + unsigned j; + for (j = 0; j < QUAD_SIZE; j++) { + assert(quad->coverage[j] >= 0.0); + assert(quad->coverage[j] <= 1.0); quadColor[3][j] *= quad->coverage[j]; + } } } diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index a73df31383..9a20c056a2 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -88,22 +88,64 @@ shade_quad( &qss->machine, quad ); - /* store result color */ +#if 0 /* XXX multi color outputs - untested */ + /* store outputs */ + boolean z_written = FALSE; + { + const ubyte *sem_name = softpipe->fs->info.output_semantic_name; + const ubyte *sem_index = softpipe->fs->info.output_semantic_index; + const uint n = qss->stage.softpipe->fs->info.num_outputs; + uint i; + for (i = 0; i < n; i++) { + switch (sem_name[i]) { + case TGSI_SEMANTIC_COLOR: + { + uint cbuf = sem_index[i]; + memcpy(quad->outputs.color[cbuf], + &machine->Outputs[i].xyzw[0].f[0], + sizeof(quad->outputs.color[0]) ); + } + break; + case TGSI_SEMANTIC_POSITION: + { + uint j; + for (j = 0; j < 4; j++) { + quad->outputs.depth[j] = machine->Outputs[0].xyzw[2].f[j]; + } + z_written = TRUE; + } + break; + } + } + } + + if (!z_written) { + /* compute Z values now, as in the quad earlyz stage */ + /* XXX we should really only do this if the earlyz stage is not used */ + const float fx = (float) quad->x0; + const float fy = (float) quad->y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + + quad->outputs.depth[0] = z0; + quad->outputs.depth[1] = z0 + dzdx; + quad->outputs.depth[2] = z0 + dzdy; + quad->outputs.depth[3] = z0 + dzdx + dzdy; + } +#endif + + /* store result color(s) */ if (qss->colorOutSlot >= 0) { /* XXX need to handle multiple color outputs someday */ - assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] + assert(softpipe->fs->info.output_semantic_name[qss->colorOutSlot] == TGSI_SEMANTIC_COLOR); memcpy( - quad->outputs.color, + quad->outputs.color[0], &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], - sizeof( quad->outputs.color ) ); + sizeof( quad->outputs.color[0] ) ); } - /* - * XXX the following code for updating quad->outputs.depth - * isn't really needed if we did early z testing. - */ - /* store result Z */ if (qss->depthOutSlot >= 0) { /* output[slot] is new Z */ diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index cfe8f11808..40083138a4 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -34,31 +34,36 @@ /** - * Write quad to framebuffer, taking mask into account. - * - * Note that surfaces support only full quad reads and writes. + * Last step of quad processing: write quad colors to the framebuffer, + * taking mask into account. */ static void output_quad(struct quad_stage *qs, struct quad_header *quad) { - struct softpipe_context *softpipe = qs->softpipe; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[softpipe->current_cbuf], - quad->x0, quad->y0); /* in-tile pos: */ const int itx = quad->x0 % TILE_SIZE; const int ity = quad->y0 % TILE_SIZE; - float (*quadColor)[4] = quad->outputs.color; - int i, j; - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { /* loop over color chans */ - tile->data.color[y][x][i] = quadColor[i][j]; + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color[cbuf]; + int i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } } } } -- cgit v1.2.3 From ebe6160d7c9ccbddd8b1cc4b0e25b3d61c54293d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 5 Sep 2008 23:21:08 +0200 Subject: softpipe: First attempts at multithreaded softpipe. Configured for 2 cores. --- src/gallium/drivers/softpipe/sp_context.c | 49 ++++---- src/gallium/drivers/softpipe/sp_context.h | 10 +- src/gallium/drivers/softpipe/sp_quad.c | 102 ++++++++------- src/gallium/drivers/softpipe/sp_setup.c | 200 +++++++++++++++++++++++++----- 4 files changed, 254 insertions(+), 107 deletions(-) (limited to 'src/gallium/drivers/softpipe/sp_quad.c') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 6f12390cf7..cd1e6663d8 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -92,17 +92,19 @@ static void softpipe_destroy( struct pipe_context *pipe ) if (softpipe->draw) draw_destroy( softpipe->draw ); - softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); - softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); - softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); - softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); - softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); - softpipe->quad.coverage->destroy( softpipe->quad.coverage ); - softpipe->quad.blend->destroy( softpipe->quad.blend ); - softpipe->quad.colormask->destroy( softpipe->quad.colormask ); - softpipe->quad.output->destroy( softpipe->quad.output ); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + softpipe->quad[i].polygon_stipple->destroy( softpipe->quad[i].polygon_stipple ); + softpipe->quad[i].earlyz->destroy( softpipe->quad[i].earlyz ); + softpipe->quad[i].shade->destroy( softpipe->quad[i].shade ); + softpipe->quad[i].alpha_test->destroy( softpipe->quad[i].alpha_test ); + softpipe->quad[i].depth_test->destroy( softpipe->quad[i].depth_test ); + softpipe->quad[i].stencil_test->destroy( softpipe->quad[i].stencil_test ); + softpipe->quad[i].occlusion->destroy( softpipe->quad[i].occlusion ); + softpipe->quad[i].coverage->destroy( softpipe->quad[i].coverage ); + softpipe->quad[i].blend->destroy( softpipe->quad[i].blend ); + softpipe->quad[i].colormask->destroy( softpipe->quad[i].colormask ); + softpipe->quad[i].output->destroy( softpipe->quad[i].output ); + } for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) sp_destroy_tile_cache(softpipe->cbuf_cache[i]); @@ -205,17 +207,19 @@ softpipe_create( struct pipe_screen *screen, /* setup quad rendering stages */ - softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); - softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); - softpipe->quad.shade = sp_quad_shade_stage(softpipe); - softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); - softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); - softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); - softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); - softpipe->quad.blend = sp_quad_blend_stage(softpipe); - softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); - softpipe->quad.output = sp_quad_output_stage(softpipe); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + softpipe->quad[i].polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); + softpipe->quad[i].earlyz = sp_quad_earlyz_stage(softpipe); + softpipe->quad[i].shade = sp_quad_shade_stage(softpipe); + softpipe->quad[i].alpha_test = sp_quad_alpha_test_stage(softpipe); + softpipe->quad[i].depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad[i].stencil_test = sp_quad_stencil_test_stage(softpipe); + softpipe->quad[i].occlusion = sp_quad_occlusion_stage(softpipe); + softpipe->quad[i].coverage = sp_quad_coverage_stage(softpipe); + softpipe->quad[i].blend = sp_quad_blend_stage(softpipe); + softpipe->quad[i].colormask = sp_quad_colormask_stage(softpipe); + softpipe->quad[i].output = sp_quad_output_stage(softpipe); + } /* * Create drawing context and plug our rendering stage into it. @@ -257,3 +261,4 @@ softpipe_create( struct pipe_screen *screen, softpipe_destroy(&softpipe->pipe); return NULL; } + diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 078886f93c..e58f7c8b6f 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -45,6 +45,10 @@ */ #define USE_DRAW_STAGE_PSTIPPLE 1 +/* Number of threads working on individual quads. + * Setting to 1 disables this feature. + */ +#define SP_NUM_QUAD_THREADS 2 struct softpipe_winsys; struct softpipe_vbuf_render; @@ -133,7 +137,7 @@ struct softpipe_context { struct quad_stage *output; struct quad_stage *first; /**< points to one of the above stages */ - } quad; + } quad[SP_NUM_QUAD_THREADS]; /** The primitive drawing context */ struct draw_context *draw; @@ -151,13 +155,11 @@ struct softpipe_context { }; - - static INLINE struct softpipe_context * softpipe_context( struct pipe_context *pipe ) { return (struct softpipe_context *)pipe; } - #endif /* SP_CONTEXT_H */ + diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index bc83d78ea1..892ef87ee9 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -33,29 +33,33 @@ static void sp_push_quad_first( struct softpipe_context *sp, - struct quad_stage *quad ) + struct quad_stage *quad, + uint i ) { - quad->next = sp->quad.first; - sp->quad.first = quad; + quad->next = sp->quad[i].first; + sp->quad[i].first = quad; } static void sp_build_depth_stencil( - struct softpipe_context *sp ) + struct softpipe_context *sp, + uint i ) { if (sp->depth_stencil->stencil[0].enabled || sp->depth_stencil->stencil[1].enabled) { - sp_push_quad_first( sp, sp->quad.stencil_test ); + sp_push_quad_first( sp, sp->quad[i].stencil_test, i ); } else if (sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf) { - sp_push_quad_first( sp, sp->quad.depth_test ); + sp_push_quad_first( sp, sp->quad[i].depth_test, i ); } } void sp_build_quad_pipeline(struct softpipe_context *sp) { + uint i; + boolean early_depth_test = sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && @@ -64,49 +68,51 @@ sp_build_quad_pipeline(struct softpipe_context *sp) !sp->fs->info.writes_z; /* build up the pipeline in reverse order... */ - - sp->quad.first = sp->quad.output; - - if (sp->blend->colormask != 0xf) { - sp_push_quad_first( sp, sp->quad.colormask ); - } - - if (sp->blend->blend_enable || - sp->blend->logicop_enable) { - sp_push_quad_first( sp, sp->quad.blend ); - } - - if (sp->depth_stencil->depth.occlusion_count) { - sp_push_quad_first( sp, sp->quad.occlusion ); - } - - if (sp->rasterizer->poly_smooth || - sp->rasterizer->line_smooth || - sp->rasterizer->point_smooth) { - sp_push_quad_first( sp, sp->quad.coverage ); - } - - if (!early_depth_test) { - sp_build_depth_stencil( sp ); - } - - if (sp->depth_stencil->alpha.enabled) { - sp_push_quad_first( sp, sp->quad.alpha_test ); - } - - /* XXX always enable shader? */ - if (1) { - sp_push_quad_first( sp, sp->quad.shade ); - } - - if (early_depth_test) { - sp_build_depth_stencil( sp ); - sp_push_quad_first( sp, sp->quad.earlyz ); - } + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + sp->quad[i].first = sp->quad[i].output; + + if (sp->blend->colormask != 0xf) { + sp_push_quad_first( sp, sp->quad[i].colormask, i ); + } + + if (sp->blend->blend_enable || + sp->blend->logicop_enable) { + sp_push_quad_first( sp, sp->quad[i].blend, i ); + } + + if (sp->depth_stencil->depth.occlusion_count) { + sp_push_quad_first( sp, sp->quad[i].occlusion, i ); + } + + if (sp->rasterizer->poly_smooth || + sp->rasterizer->line_smooth || + sp->rasterizer->point_smooth) { + sp_push_quad_first( sp, sp->quad[i].coverage, i ); + } + + if (!early_depth_test) { + sp_build_depth_stencil( sp, i ); + } + + if (sp->depth_stencil->alpha.enabled) { + sp_push_quad_first( sp, sp->quad[i].alpha_test, i ); + } + + /* XXX always enable shader? */ + if (1) { + sp_push_quad_first( sp, sp->quad[i].shade, i ); + } + + if (early_depth_test) { + sp_build_depth_stencil( sp, i ); + sp_push_quad_first( sp, sp->quad[i].earlyz, i ); + } #if !USE_DRAW_STAGE_PSTIPPLE - if (sp->rasterizer->poly_stipple_enable) { - sp_push_quad_first( sp, sp->quad.polygon_stipple ); - } + if (sp->rasterizer->poly_stipple_enable) { + sp_push_quad_first( sp, sp->quad[i].polygon_stipple, i ); + } #endif + } } + diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 87336ab6e3..706a412baf 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -43,6 +43,7 @@ #include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "pipe/p_shader_tokens.h" +#include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -61,6 +62,51 @@ struct edge { int lines; /**< number of lines on this edge */ }; +#if SP_NUM_QUAD_THREADS > 1 + +struct thread_info +{ + struct setup_context *setup; + uint id; + pipe_thread handle; +}; + +struct quad_job; + +typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job ); + +struct quad_job +{ + int x, y; + unsigned mask; + struct quad_header quad; + quad_job_routine routine; +}; + +#define NUM_QUAD_JOBS 64 + +struct quad_job_que +{ + struct quad_job jobs[NUM_QUAD_JOBS]; + uint first; + uint last; + pipe_mutex mutex; +}; + +static void +add_quad_job( struct quad_job_que *que, int x, int y, unsigned mask, struct quad_header *quad, quad_job_routine routine ) +{ + while ((que->last + 1) % NUM_QUAD_JOBS == que->first) + usleep( 10 ); + que->jobs[que->last].x = x; + que->jobs[que->last].y = y; + que->jobs[que->last].mask = mask; + que->jobs[que->last].quad = *quad; + que->jobs[que->last].routine = routine; + que->last = (que->last + 1) % NUM_QUAD_JOBS; +} + +#endif /** * Triangle setup info (derived from draw_stage). @@ -88,6 +134,12 @@ struct setup_context { struct tgsi_interp_coef posCoef; /* For Z, W */ struct quad_header quad; +#if SP_NUM_QUAD_THREADS > 1 + struct quad_job_que que; + struct thread_info threads[SP_NUM_QUAD_THREADS]; + +#endif + struct { int left[2]; /**< [0] = row0, [1] = row1 */ int right[2]; @@ -104,7 +156,36 @@ struct setup_context { unsigned winding; /* which winding to cull */ }; +#if SP_NUM_QUAD_THREADS > 1 +static PIPE_THREAD_ROUTINE( quad_thread, param ) +{ + struct thread_info *info = (struct thread_info *) param; + + for (;;) { + struct quad_job *job; + + while (info->setup->que.last == info->setup->que.first) + usleep( 10 ); + pipe_mutex_lock( info->setup->que.mutex ); + job = &info->setup->que.jobs[info->setup->que.first]; + info->setup->que.first = (info->setup->que.first + 1) % NUM_QUAD_JOBS; + job->routine( info->setup, info->id, job ); + pipe_mutex_unlock( info->setup->que.mutex ); + } +} + +#define WAIT_FOR_COMPLETION(setup) \ + do {\ + while (setup->que.last != setup->que.first)\ + usleep( 10 );\ + } while (0) + +#else + +#define WAIT_FOR_COMPLETION(setup) ((void) 0) + +#endif /** * Test if x is NaN or +/- infinity. @@ -143,7 +224,7 @@ static boolean cull_tri( struct setup_context *setup, * Clip setup->quad against the scissor/surface bounds. */ static INLINE void -quad_clip(struct setup_context *setup) +quad_clip( struct setup_context *setup, struct quad_header *quad ) { const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; const int minx = (int) cliprect->minx; @@ -151,22 +232,22 @@ quad_clip(struct setup_context *setup) const int miny = (int) cliprect->miny; const int maxy = (int) cliprect->maxy; - if (setup->quad.x0 >= maxx || - setup->quad.y0 >= maxy || - setup->quad.x0 + 1 < minx || - setup->quad.y0 + 1 < miny) { + if (quad->x0 >= maxx || + quad->y0 >= maxy || + quad->x0 + 1 < minx || + quad->y0 + 1 < miny) { /* totally clipped */ - setup->quad.mask = 0x0; + quad->mask = 0x0; return; } - if (setup->quad.x0 < minx) - setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (setup->quad.y0 < miny) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (setup->quad.x0 == maxx - 1) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (setup->quad.y0 == maxy - 1) - setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + if (quad->x0 < minx) + quad->mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (quad->y0 < miny) + quad->mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (quad->x0 == maxx - 1) + quad->mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (quad->y0 == maxy - 1) + quad->mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); } @@ -174,35 +255,52 @@ quad_clip(struct setup_context *setup) * Emit a quad (pass to next stage) with clipping. */ static INLINE void -clip_emit_quad(struct setup_context *setup) +clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) { - quad_clip(setup); - if (setup->quad.mask) { + quad_clip( setup, quad ); + if (quad->mask) { struct softpipe_context *sp = setup->softpipe; - sp->quad.first->run(sp->quad.first, &setup->quad); + + sp->quad[thread].first->run( sp->quad[thread].first, quad ); } } +#if SP_NUM_QUAD_THREADS > 1 + +static void +clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ + clip_emit_quad( setup, &job->quad, thread ); +} + +#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, 0, 0, 0, &setup->quad, clip_emit_quad_job ) + +#else + +#define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 ) + +#endif /** * Emit a quad (pass to next stage). No clipping is done. */ static INLINE void -emit_quad( struct setup_context *setup, int x, int y, unsigned mask ) +emit_quad( struct setup_context *setup, int x, int y, unsigned mask, struct quad_header *quad, uint thread ) { struct softpipe_context *sp = setup->softpipe; - setup->quad.x0 = x; - setup->quad.y0 = y; - setup->quad.mask = mask; + + quad->x0 = x; + quad->y0 = y; + quad->mask = mask; #if DEBUG_FRAGS if (mask & 1) setup->numFragsEmitted++; if (mask & 2) setup->numFragsEmitted++; if (mask & 4) setup->numFragsEmitted++; if (mask & 8) setup->numFragsEmitted++; #endif - sp->quad.first->run(sp->quad.first, &setup->quad); + sp->quad[thread].first->run( sp->quad[thread].first, quad ); #if DEBUG_FRAGS - mask = setup->quad.mask; + mask = quad->mask; if (mask & 1) setup->numFragsWritten++; if (mask & 2) setup->numFragsWritten++; if (mask & 4) setup->numFragsWritten++; @@ -210,6 +308,21 @@ emit_quad( struct setup_context *setup, int x, int y, unsigned mask ) #endif } +#if SP_NUM_QUAD_THREADS > 1 + +static void +emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ + emit_quad( setup, job->x, job->y, job->mask, &job->quad, thread ); +} + +#define EMIT_QUAD(setup,x,y,mask) add_quad_job( &setup->que, x, y, mask, &setup->quad, emit_quad_job ) + +#else + +#define EMIT_QUAD(setup,x,y,mask) emit_quad( setup, x, y, mask, &setup->quad, 0 ) + +#endif /** * Given an X or Y coordinate, return the block/quad coordinate that it @@ -249,7 +362,7 @@ static void flush_spans( struct setup_context *setup ) mask |= MASK_TOP_RIGHT; if (x+1 >= xleft1 && x+1 < xright1) mask |= MASK_BOTTOM_RIGHT; - emit_quad( setup, x, setup->span.y, mask ); + EMIT_QUAD( setup, x, setup->span.y, mask ); } break; @@ -263,7 +376,7 @@ static void flush_spans( struct setup_context *setup ) mask |= MASK_TOP_LEFT; if (x+1 >= xleft0 && x+1 < xright0) mask |= MASK_TOP_RIGHT; - emit_quad( setup, x, setup->span.y, mask ); + EMIT_QUAD( setup, x, setup->span.y, mask ); } break; @@ -277,7 +390,7 @@ static void flush_spans( struct setup_context *setup ) mask |= MASK_BOTTOM_LEFT; if (x+1 >= xleft1 && x+1 < xright1) mask |= MASK_BOTTOM_RIGHT; - emit_quad( setup, x, setup->span.y, mask ); + EMIT_QUAD( setup, x, setup->span.y, mask ); } break; @@ -790,6 +903,8 @@ void setup_tri( struct setup_context *setup, flush_spans( setup ); + WAIT_FOR_COMPLETION(setup); + #if DEBUG_FRAGS printf("Tri: %u frags emitted, %u written\n", setup->numFragsEmitted, @@ -931,7 +1046,7 @@ plot(struct setup_context *setup, int x, int y) /* flush prev quad, start new quad */ if (setup->quad.x0 != -1) - clip_emit_quad(setup); + CLIP_EMIT_QUAD(setup); setup->quad.x0 = quadX; setup->quad.y0 = quadY; @@ -1053,8 +1168,10 @@ setup_line(struct setup_context *setup, /* draw final quad */ if (setup->quad.mask) { - clip_emit_quad(setup); + CLIP_EMIT_QUAD(setup); } + + WAIT_FOR_COMPLETION(setup); } @@ -1163,7 +1280,7 @@ setup_point( struct setup_context *setup, setup->quad.x0 = (int) x - ix; setup->quad.y0 = (int) y - iy; setup->quad.mask = (1 << ix) << (2 * iy); - clip_emit_quad(setup); + CLIP_EMIT_QUAD(setup); } else { if (round) { @@ -1224,7 +1341,7 @@ setup_point( struct setup_context *setup, if (setup->quad.mask) { setup->quad.x0 = ix; setup->quad.y0 = iy; - clip_emit_quad(setup); + CLIP_EMIT_QUAD(setup); } } } @@ -1271,11 +1388,13 @@ setup_point( struct setup_context *setup, setup->quad.mask = mask; setup->quad.x0 = ix; setup->quad.y0 = iy; - clip_emit_quad(setup); + CLIP_EMIT_QUAD(setup); } } } } + + WAIT_FOR_COMPLETION(setup); } void setup_prepare( struct setup_context *setup ) @@ -1300,7 +1419,9 @@ void setup_prepare( struct setup_context *setup ) /* Note: nr_attrs is only used for debugging (vertex printing) */ setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw); - sp->quad.first->begin(sp->quad.first); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + sp->quad[i].first->begin( sp->quad[i].first ); + } if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && @@ -1328,11 +1449,24 @@ void setup_destroy_context( struct setup_context *setup ) struct setup_context *setup_create_context( struct softpipe_context *softpipe ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); + uint i; setup->softpipe = softpipe; setup->quad.coef = setup->coef; setup->quad.posCoef = &setup->posCoef; +#if SP_NUM_QUAD_THREADS > 1 + setup->que.first = 0; + setup->que.last = 0; + pipe_mutex_init( setup->que.mutex ); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + setup->threads[i].setup = setup; + setup->threads[i].id = i; + setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] ); + } +#endif + return setup; } + -- cgit v1.2.3