diff options
author | Ben Skeggs <skeggsb@gmail.com> | 2008-09-12 20:33:59 +1000 |
---|---|---|
committer | Ben Skeggs <skeggsb@gmail.com> | 2008-09-12 20:33:59 +1000 |
commit | 81335d0f1760fe172a106f79e81281c1f0d7dedf (patch) | |
tree | f081924b0dad0fdfb1fa94580d63a88498469909 /src/gallium/drivers/cell/spu | |
parent | f302fca5eb63e4bca8af5b35c585451486143e6a (diff) | |
parent | aa66f08a21b791f338b519f0c2162cd8f7b3aeb0 (diff) |
Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
Diffstat (limited to 'src/gallium/drivers/cell/spu')
-rw-r--r-- | src/gallium/drivers/cell/spu/Makefile | 7 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_colorpack.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_main.c | 225 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_main.h | 97 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 602 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_per_fragment_op.h | 58 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_texture.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_tri.c | 134 |
8 files changed, 683 insertions, 448 deletions
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index d49abb2e82..1ae0dfb8c1 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -22,12 +22,15 @@ SOURCES = \ spu_render.c \ spu_texture.c \ spu_tile.c \ - spu_tri.c \ + spu_tri.c + +OLD_SOURCES = \ spu_exec.c \ spu_util.c \ spu_vertex_fetch.c \ spu_vertex_shader.c + SPU_OBJECTS = $(SOURCES:.c=.o) \ SPU_ASM_OUT = $(SOURCES:.c=.s) \ @@ -43,7 +46,7 @@ INCLUDE_DIRS = \ $(SPU_CC) $(SPU_CFLAGS) -c $< .c.s: - $(SPU_CC) $(SPU_CFLAGS) -S $< + $(SPU_CC) $(SPU_CFLAGS) -O3 -S $< # The .a file will be linked into the main/PPU executable diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h index e9fee8a3a6..fd8dc6ded3 100644 --- a/src/gallium/drivers/cell/spu/spu_colorpack.h +++ b/src/gallium/drivers/cell/spu/spu_colorpack.h @@ -79,14 +79,14 @@ spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) static INLINE vector float -spu_unpack_color(uint color) +spu_unpack_B8G8R8A8(uint color) { vector unsigned int color_u4 = spu_splats(color); color_u4 = spu_shuffle(color_u4, color_u4, ((vector unsigned char) { - 0, 0, 0, 0, - 5, 5, 5, 5, 10, 10, 10, 10, + 5, 5, 5, 5, + 0, 0, 0, 0, 15, 15, 15, 15}) ); return spu_convtf(color_u4, 32); } diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index d223f32d94..78260c4259 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -34,6 +34,7 @@ #include "spu_main.h" #include "spu_render.h" +#include "spu_per_fragment_op.h" #include "spu_texture.h" #include "spu_tile.h" //#include "spu_test.h" @@ -46,7 +47,7 @@ /* helpful headers: /usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h -/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h +/opt/cell/sdk/usr/include/libmisc.h */ boolean Debug = FALSE; @@ -62,14 +63,6 @@ struct spu_vs_context draw; static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] ALIGN16_ATTRIB; -static unsigned char depth_stencil_code_buffer[4 * 64] - ALIGN16_ATTRIB; - -static unsigned char fb_blend_code_buffer[4 * 64] - ALIGN16_ATTRIB; - -static unsigned char logicop_code_buffer[4 * 64] - ALIGN16_ATTRIB; /** @@ -226,6 +219,46 @@ cmd_release_verts(const struct cell_command_release_verts *release) } +/** + * Process a CELL_CMD_STATE_FRAGMENT_OPS command. + * This involves installing new fragment ops SPU code. + * If this function is never called, we'll use a regular C fallback function + * for fragment processing. + */ +static void +cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) +{ + if (Debug) + printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); + /* Copy state info (for fallback case only) */ + memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); + memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); + + /* Point function pointer at new code */ + spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + + spu.read_depth = spu.depth_stencil_alpha.depth.enabled; + spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; +} + + +static void +cmd_state_fragment_program(const struct cell_command_fragment_program *fp) +{ + if (Debug) + printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_program_code, fp->code, + SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); +#if 01 + /* Point function pointer at new code */ + spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; +#endif +} + + static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { @@ -252,102 +285,24 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) switch (spu.fb.depth_format) { case PIPE_FORMAT_Z32_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0xffffffffu; + break; case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: spu.fb.zsize = 4; + spu.fb.zscale = (float) 0x00ffffffu; break; case PIPE_FORMAT_Z16_UNORM: spu.fb.zsize = 2; + spu.fb.zscale = (float) 0xffffu; break; default: spu.fb.zsize = 0; break; } - - if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) - spu.color_shuffle = ((vector unsigned char) { - 12, 0, 4, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}); - else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM) - spu.color_shuffle = ((vector unsigned char) { - 8, 4, 0, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}); - else - ASSERT(0); -} - - -static void -cmd_state_blend(const struct cell_command_blend *state) -{ - if (Debug) - printf("SPU %u: BLEND: enabled %d\n", - spu.init.id, - (state->size != 0)); - - ASSERT_ALIGN16(state->base); - - if (state->size != 0) { - mfc_get(fb_blend_code_buffer, - (unsigned int) state->base, /* src */ - ROUNDUP16(state->size), - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - spu.blend = (blend_func) fb_blend_code_buffer; - spu.read_fb = state->read_fb; - } else { - spu.read_fb = FALSE; - } -} - - -static void -cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state) -{ - if (Debug) - printf("SPU %u: DEPTH_STENCIL: ztest %d\n", - spu.init.id, - state->read_depth); - - ASSERT_ALIGN16(state->base); - - if (state->size != 0) { - mfc_get(depth_stencil_code_buffer, - (unsigned int) state->base, /* src */ - ROUNDUP16(state->size), - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - } else { - /* If there is no code, emit a return instruction. - */ - depth_stencil_code_buffer[0] = 0x35; - depth_stencil_code_buffer[1] = 0x00; - depth_stencil_code_buffer[2] = 0x00; - depth_stencil_code_buffer[3] = 0x00; - } - - spu.frag_test = (frag_test_func) depth_stencil_code_buffer; - spu.read_depth = state->read_depth; - spu.read_stencil = state->read_stencil; -} - - -static void -cmd_state_logicop(const struct cell_command_logicop * code) -{ - mfc_get(logicop_code_buffer, - (unsigned int) code->base, /* src */ - code->size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - spu.logicop = (logicop_func) logicop_code_buffer; } @@ -450,7 +405,9 @@ cmd_finish(void) /** - * Execute a batch of commands + * Execute a batch of commands which was sent to us by the PPU. + * See the cell_emit_state.c code to see where the commands come from. + * * The opcode param encodes the location of the buffer and its size. */ static void @@ -487,16 +444,14 @@ cmd_batch(uint opcode) printf("SPU %u: release batch buf %u\n", spu.init.id, buf); release_buffer(buf); + /* + * Loop over commands in the batch buffer + */ for (pos = 0; pos < usize; /* no incr */) { switch (buffer[pos]) { - case CELL_CMD_STATE_FRAMEBUFFER: - { - struct cell_command_framebuffer *fb - = (struct cell_command_framebuffer *) &buffer[pos]; - cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 8; - } - break; + /* + * rendering commands + */ case CELL_CMD_CLEAR_SURFACE: { struct cell_command_clear_surface *clr @@ -514,26 +469,32 @@ cmd_batch(uint opcode) pos += pos_incr; } break; - case CELL_CMD_RELEASE_VERTS: + /* + * state-update commands + */ + case CELL_CMD_STATE_FRAMEBUFFER: { - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) &buffer[pos]; - cmd_release_verts(release); - pos += sizeof(*release) / 8; + struct cell_command_framebuffer *fb + = (struct cell_command_framebuffer *) &buffer[pos]; + cmd_state_framebuffer(fb); + pos += sizeof(*fb) / 8; } break; - case CELL_CMD_FINISH: - cmd_finish(); - pos += 1; - break; - case CELL_CMD_STATE_BLEND: - cmd_state_blend((struct cell_command_blend *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_command_blend)) / 8); + case CELL_CMD_STATE_FRAGMENT_OPS: + { + struct cell_command_fragment_ops *fops + = (struct cell_command_fragment_ops *) &buffer[pos]; + cmd_state_fragment_ops(fops); + pos += sizeof(*fops) / 8; + } break; - case CELL_CMD_STATE_DEPTH_STENCIL: - cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *) - &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8); + case CELL_CMD_STATE_FRAGMENT_PROGRAM: + { + struct cell_command_fragment_program *fp + = (struct cell_command_fragment_program *) &buffer[pos]; + cmd_state_fragment_program(fp); + pos += sizeof(*fp) / 8; + } break; case CELL_CMD_STATE_SAMPLER: { @@ -569,8 +530,10 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; case CELL_CMD_STATE_BIND_VS: +#if 0 spu_bind_vertex_shader(&draw, (struct cell_shader_info *) &buffer[pos+1]); +#endif pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); break; case CELL_CMD_STATE_ATTRIB_FETCH: @@ -578,9 +541,20 @@ cmd_batch(uint opcode) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); break; - case CELL_CMD_STATE_LOGICOP: - cmd_state_logicop((struct cell_command_logicop *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_command_logicop)) / 8); + /* + * misc commands + */ + case CELL_CMD_FINISH: + cmd_finish(); + pos += 1; + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 8; + } break; case CELL_CMD_FLUSH_BUFFER_RANGE: { struct cell_buffer_range *br = (struct cell_buffer_range *) @@ -650,7 +624,9 @@ main_loop(void) exitFlag = 1; break; case CELL_CMD_VS_EXECUTE: +#if 0 spu_execute_vertex_shader(&draw, &cmd.vs); +#endif break; case CELL_CMD_BATCH: cmd_batch(opcode); @@ -675,6 +651,11 @@ one_time_init(void) memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); invalidate_tex_cache(); + + /* Install default/fallback fragment processing function. + * This will normally be overriden by a code-gen'd function. + */ + spu.fragment_ops = spu_fallback_fragment_ops; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 4879f8c9c8..2c7b625840 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -41,6 +41,10 @@ #define MAX_HEIGHT 1024 +/** + * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. + * The data may be addressed through several different types. + */ typedef union { ushort us[TILE_SIZE][TILE_SIZE]; uint ui[TILE_SIZE][TILE_SIZE]; @@ -56,38 +60,29 @@ typedef union { #define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ -struct spu_frag_test_results { - qword mask; - qword depth; - qword stencil; -}; - -typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask, - qword pixel_depth, qword pixel_stencil, qword frag_depth, - qword frag_alpha, qword facing); - - -struct spu_blend_results { - qword r; - qword g; - qword b; - qword a; -}; +/** Function for sampling textures */ +typedef vector float (*spu_sample_texture_func)(uint unit, + vector float texcoord); -typedef struct spu_blend_results (*blend_func)( - qword frag_r, qword frag_g, qword frag_b, qword frag_a, - qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, - qword const_r, qword const_g, qword const_b, qword const_a); +/** Function for performing per-fragment ops */ +typedef void (*spu_fragment_ops_func)(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragRed, + vector float fragGreen, + vector float fragBlue, + vector float fragAlpha, + vector unsigned int mask); -typedef struct spu_blend_results (*logicop_func)( - qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, - qword frag_r, qword frag_g, qword frag_b, qword frag_a, - qword frag_mask); +/** Function for running fragment program */ +typedef void (*spu_fragment_program_func)(vector float *inputs, + vector float *outputs, + vector float *constants); -typedef vector float (*sample_texture_func)(uint unit, vector float texcoord); - -struct spu_framebuffer { +struct spu_framebuffer +{ void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ enum pipe_format color_format; @@ -99,6 +94,7 @@ struct spu_framebuffer { uint depth_clear_value; uint zsize; /**< 0, 2 or 4 bytes per Z */ + float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ } ALIGN16_ATTRIB; @@ -115,35 +111,31 @@ struct spu_texture /** - * All SPU global/context state will be in singleton object of this type: + * All SPU global/context state will be in a singleton object of this type: */ struct spu_global { + /** One-time init/constant info */ struct cell_init_info init; + /* + * Current state + */ struct spu_framebuffer fb; - boolean read_depth; - boolean read_stencil; - frag_test_func frag_test; /**< Current depth/stencil test code */ - - boolean read_fb; /**< Does current blend mode require framebuffer read? */ - blend_func blend; /**< Current blend code */ - qword const_blend_color[4] ALIGN16_ATTRIB; - - logicop_func logicop; /**< Current logicop code **/ - + struct pipe_depth_stencil_alpha_state depth_stencil_alpha; + struct pipe_blend_state blend; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct spu_texture texture[PIPE_MAX_SAMPLERS]; - struct vertex_info vertex_info; - /* XXX more state to come */ - - - /** current color and Z tiles */ + /** Current color and Z tiles */ tile_t ctile ALIGN16_ATTRIB; tile_t ztile ALIGN16_ATTRIB; + /** Read depth/stencil tiles? */ + boolean read_depth; + boolean read_stencil; + /** Current tiles' status */ ubyte cur_ctile_status, cur_ztile_status; @@ -151,11 +143,22 @@ struct spu_global ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + /** Current fragment ops machine code */ + uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS]; + /** Current fragment ops function */ + spu_fragment_ops_func fragment_ops; + + /** Current fragment program machine code */ + uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; + /** Current fragment ops function */ + spu_fragment_program_func fragment_program; - /** for converting RGBA to PIPE_FORMAT_x colors */ - vector unsigned char color_shuffle; + /** Current texture sampler function */ + spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; - sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; + /** Fragment program constants (XXX preliminary/used) */ +#define MAX_CONSTANTS 32 + vector float constants[MAX_CONSTANTS]; } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index c0a729b3d2..03dd547845 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -1,211 +1,475 @@ -/* - * (C) Copyright IBM Corporation 2008 +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ /** - * \file spu_per_fragment_op.c - * SPU implementation various per-fragment operations. - * - * \author Ian Romanick <idr@us.ibm.com> + * \author Brian Paul */ + +#include <transpose_matrix4x4.h> #include "pipe/p_format.h" #include "spu_main.h" +#include "spu_colorpack.h" #include "spu_per_fragment_op.h" -#define ZERO 0x80 -static void -read_ds_quad(tile_t *buffer, unsigned x, unsigned y, - enum pipe_format depth_format, qword *depth, - qword *stencil) -{ - const int ix = x / 2; - const int iy = y / 2; +#define LINEAR_QUAD_LAYOUT 1 - switch (depth_format) { - case PIPE_FORMAT_Z16_UNORM: { - qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; - const qword shuf_vec = (qword) { - ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, - ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7 - }; +/** + * Called by rasterizer for each quad after the shader has run. Do + * all the per-fragment operations including alpha test, z test, + * stencil test, blend, colormask and logicops. This is a + * fallback/debug function. In reality we'll use a generated function + * produced by the PPU. But this function is useful for + * debug/validation. + */ +void +spu_fallback_fragment_ops(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragR, + vector float fragG, + vector float fragB, + vector float fragA, + vector unsigned int mask) +{ + vector float frag_aos[4]; + unsigned int c0, c1, c2, c3; + + /* do alpha test */ + if (spu.depth_stencil_alpha.alpha.enabled) { + vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref); + vector unsigned int amask; + + switch (spu.depth_stencil_alpha.alpha.func) { + case PIPE_FUNC_LESS: + amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */ + break; + case PIPE_FUNC_GREATER: + amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */ + break; + case PIPE_FUNC_GEQUAL: + amask = spu_cmpgt(ref, fragA); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_LEQUAL: + amask = spu_cmpgt(fragA, ref); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_EQUAL: + amask = spu_cmpeq(ref, fragA); + break; + case PIPE_FUNC_NOTEQUAL: + amask = spu_cmpeq(ref, fragA); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_ALWAYS: + amask = spu_splats(0xffffffffU); + break; + case PIPE_FUNC_NEVER: + amask = spu_splats( 0x0U); + break; + default: + ; + } + + mask = spu_and(mask, amask); + } + /* Z and/or stencil testing... */ + if (spu.depth_stencil_alpha.depth.enabled || + spu.depth_stencil_alpha.stencil[0].enabled) { + + /* get four Z/Stencil values from tile */ + vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU); + vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2]; + vector unsigned int ifbZ = spu_and(ifbZS, mask24); + vector unsigned int ifbS = spu_andc(ifbZS, mask24); + + if (spu.depth_stencil_alpha.stencil[0].enabled) { + /* do stencil test */ + ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM); + + } + else if (spu.depth_stencil_alpha.depth.enabled) { + /* do depth test */ + + ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM || + spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM); + + vector unsigned int ifragZ; + vector unsigned int zmask; + + /* convert four fragZ from float to uint */ + fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff)); + ifragZ = spu_convtu(fragZ, 0); + + /* do depth comparison, setting zmask with results */ + switch (spu.depth_stencil_alpha.depth.func) { + case PIPE_FUNC_LESS: + zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */ + break; + case PIPE_FUNC_GREATER: + zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */ + break; + case PIPE_FUNC_GEQUAL: + zmask = spu_cmpgt(ifbZ, ifragZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_LEQUAL: + zmask = spu_cmpgt(ifragZ, ifbZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_EQUAL: + zmask = spu_cmpeq(ifbZ, ifragZ); + break; + case PIPE_FUNC_NOTEQUAL: + zmask = spu_cmpeq(ifbZ, ifragZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_ALWAYS: + zmask = spu_splats(0xffffffffU); + break; + case PIPE_FUNC_NEVER: + zmask = spu_splats( 0x0U); + break; + default: + ; + } + + mask = spu_and(mask, zmask); + + /* merge framebuffer Z and fragment Z according to the mask */ + ifbZ = spu_or(spu_and(ifragZ, mask), + spu_andc(ifbZ, mask)); + } + + if (spu_extract(spu_orx(mask), 0)) { + /* put new fragment Z/Stencil values back into Z/Stencil tile */ + depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS); + + spu.cur_ztile_status = TILE_STATUS_DIRTY; + } + } - /* At even X values we want the first 4 shorts, and at odd X values we - * want the second 4 shorts. + if (spu.blend.blend_enable) { + /* blending terms, misc regs */ + vector float term1r, term1g, term1b, term1a; + vector float term2r, term2g, term2b, term2a; + vector float one, tmp; + + vector float fbRGBA[4]; /* current framebuffer colors */ + + /* get colors from framebuffer/tile */ + { + vector float fc[4]; + uint c0, c1, c2, c3; + +#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ + c0 = colorTile->ui[y][x*2+0]; + c1 = colorTile->ui[y][x*2+1]; + c2 = colorTile->ui[y][x*2+2]; + c3 = colorTile->ui[y][x*2+3]; +#else + c0 = colorTile->ui[y+0][x+0]; + c1 = colorTile->ui[y+0][x+1]; + c2 = colorTile->ui[y+1][x+0]; + c3 = colorTile->ui[y+1][x+1]; +#endif + switch (spu.fb.color_format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + fc[0] = spu_unpack_B8G8R8A8(c0); + fc[1] = spu_unpack_B8G8R8A8(c1); + fc[2] = spu_unpack_B8G8R8A8(c2); + fc[3] = spu_unpack_B8G8R8A8(c3); + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + fc[0] = spu_unpack_A8R8G8B8(c0); + fc[1] = spu_unpack_A8R8G8B8(c1); + fc[2] = spu_unpack_A8R8G8B8(c2); + fc[3] = spu_unpack_A8R8G8B8(c3); + break; + default: + ASSERT(0); + } + _transpose_matrix4x4(fbRGBA, fc); + } + + /* + * Compute Src RGB terms */ - qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3)); - qword bias_mask = si_fsmbi(0x3333); - qword sv = si_a(shuf_vec, si_and(bias_mask, bias)); - - *depth = si_shufb(*ptr, *ptr, sv); - *stencil = si_il(0); - break; + switch (spu.blend.rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1r = fragR; + term1g = fragG; + term1b = fragB; + break; + case PIPE_BLENDFACTOR_ZERO: + term1r = + term1g = + term1b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1r = spu_mul(fragR, fragR); + term1g = spu_mul(fragG, fragG); + term1b = spu_mul(fragB, fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1r = spu_mul(fragR, fragA); + term1g = spu_mul(fragG, fragA); + term1b = spu_mul(fragB, fragA); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term + */ + switch (spu.blend.alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1a = fragA; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1a = spu_mul(fragA, fragA); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest RGB terms + */ + switch (spu.blend.rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2r = fragR; + term2g = fragG; + term2b = fragB; + break; + case PIPE_BLENDFACTOR_ZERO: + term2r = + term2g = + term2b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2r = spu_mul(fbRGBA[0], fragR); + term2g = spu_mul(fbRGBA[1], fragG); + term2b = spu_mul(fbRGBA[2], fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2r = spu_mul(fbRGBA[0], fragA); + term2g = spu_mul(fbRGBA[1], fragA); + term2b = spu_mul(fbRGBA[2], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2r = spu_mul(fbRGBA[0], tmp); + term2g = spu_mul(fbRGBA[1], tmp); + term2b = spu_mul(fbRGBA[2], tmp); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term + */ + switch (spu.blend.alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2a = fragA; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2a = spu_mul(fbRGBA[3], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2a = spu_mul(fbRGBA[3], tmp); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms + */ + switch (spu.blend.rgb_func) { + case PIPE_BLEND_ADD: + fragR = spu_add(term1r, term2r); + fragG = spu_add(term1g, term2g); + fragB = spu_add(term1b, term2b); + break; + case PIPE_BLEND_SUBTRACT: + fragR = spu_sub(term1r, term2r); + fragG = spu_sub(term1g, term2g); + fragB = spu_sub(term1b, term2b); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (spu.blend.alpha_func) { + case PIPE_BLEND_ADD: + fragA = spu_add(term1a, term2a); + break; + case PIPE_BLEND_SUBTRACT: + fragA = spu_sub(term1a, term2a); + break; + /* XXX more cases */ + default: + ASSERT(0); + } } - case PIPE_FORMAT_Z32_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - - *depth = *ptr; - *stencil = si_il(0); - break; + /* + * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA. + */ +#if 0 + /* original code */ + { + vector float frag_soa[4]; + frag_soa[0] = fragR; + frag_soa[1] = fragG; + frag_soa[2] = fragB; + frag_soa[3] = fragA; + _transpose_matrix4x4(frag_aos, frag_soa); } - - - case PIPE_FORMAT_Z24S8_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - qword mask = si_fsmbi(0xEEEE); - - *depth = si_rotmai(si_and(*ptr, mask), -8); - *stencil = si_andc(*ptr, mask); +#else + /* short-cut relying on function parameter layout: */ + _transpose_matrix4x4(frag_aos, &fragR); + (void) fragG; + (void) fragB; +#endif + + /* + * Pack float colors into 32-bit RGBA words. + */ + switch (spu.fb.color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + c0 = spu_pack_A8R8G8B8(frag_aos[0]); + c1 = spu_pack_A8R8G8B8(frag_aos[1]); + c2 = spu_pack_A8R8G8B8(frag_aos[2]); + c3 = spu_pack_A8R8G8B8(frag_aos[3]); break; - } - - - case PIPE_FORMAT_S8Z24_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - *depth = si_and(*ptr, si_fsmbi(0x7777)); - *stencil = si_andi(si_roti(*ptr, 8), 0x0ff); + case PIPE_FORMAT_B8G8R8A8_UNORM: + c0 = spu_pack_B8G8R8A8(frag_aos[0]); + c1 = spu_pack_B8G8R8A8(frag_aos[1]); + c2 = spu_pack_B8G8R8A8(frag_aos[2]); + c3 = spu_pack_B8G8R8A8(frag_aos[3]); break; - } - - default: + fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); ASSERT(0); - break; - } -} - - -static void -write_ds_quad(tile_t *buffer, unsigned x, unsigned y, - enum pipe_format depth_format, - qword depth, qword stencil) -{ - const int ix = x / 2; - const int iy = y / 2; - - (void) stencil; - - switch (depth_format) { - case PIPE_FORMAT_Z16_UNORM: { - qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; - - qword sv = ((ix & 0x01) == 0) - ? (qword) { 2, 3, 6, 7, 10, 11, 14, 15, - 24, 25, 26, 27, 28, 29, 30, 31 } - : (qword) { 16, 17, 18, 19, 20 , 21, 22, 23, - 2, 3, 6, 7, 10, 11, 14, 15 }; - *ptr = si_shufb(depth, *ptr, sv); - break; - } - - - case PIPE_FORMAT_Z32_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - *ptr = depth; - break; } - case PIPE_FORMAT_Z24S8_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - qword mask = si_fsmbi(0xEEEE); - - depth = si_shli(depth, 8); - *ptr = si_selb(stencil, depth, mask); - break; - } - - - case PIPE_FORMAT_S8Z24_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - qword mask = si_fsmbi(0x7777); - - stencil = si_shli(stencil, 24); - *ptr = si_selb(stencil, depth, mask); - break; + /* + * Color masking + */ + if (spu.blend.colormask != 0xf) { + /* XXX to do */ + /* apply color mask to 32-bit packed colors */ } - default: - ASSERT(0); - break; + /* + * Logic Ops + */ + if (spu.blend.logicop_enable) { + /* XXX to do */ + /* apply logicop to 32-bit packed colors */ } -} -qword -spu_do_depth_stencil(int x, int y, - qword frag_mask, qword frag_depth, qword frag_alpha, - qword facing) -{ - struct spu_frag_test_results result; - qword pixel_depth; - qword pixel_stencil; - - /* All of this preable code (everthing before the call to frag_test) should - * be generated on the PPU and upload to the SPU. + /* + * If mask is non-zero, mark tile as dirty. */ - if (spu.read_depth || spu.read_stencil) { - read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, - &pixel_depth, &pixel_stencil); + if (spu_extract(spu_orx(mask), 0)) { + spu.cur_ctile_status = TILE_STATUS_DIRTY; } - - switch (spu.fb.depth_format) { - case PIPE_FORMAT_Z16_UNORM: - frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu))); - frag_depth = si_cfltu(frag_depth, 0); - break; - case PIPE_FORMAT_Z32_UNORM: - frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu))); - frag_depth = si_cfltu(frag_depth, 0); - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu))); - frag_depth = si_cfltu(frag_depth, 0); - break; - default: - ASSERT(0); - break; + else { + return; } - result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil, - frag_depth, frag_alpha, facing); - - /* This code (everthing after the call to frag_test) should - * be generated on the PPU and upload to the SPU. + /* + * Write new quad colors to the framebuffer/tile. + * Only write pixels where the corresponding mask word is set. */ - if (spu.read_depth || spu.read_stencil) { - write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, - result.depth, result.stencil); - } - - return result.mask; +#if LINEAR_QUAD_LAYOUT + /* + * Quad layout: + * +--+--+--+--+ + * |p0|p1|p2|p3| + * +--+--+--+--+ + */ + if (spu_extract(mask, 0)) + colorTile->ui[y][x*2] = c0; + if (spu_extract(mask, 1)) + colorTile->ui[y][x*2+1] = c1; + if (spu_extract(mask, 2)) + colorTile->ui[y][x*2+2] = c2; + if (spu_extract(mask, 3)) + colorTile->ui[y][x*2+3] = c3; +#else + /* + * Quad layout: + * +--+--+ + * |p0|p1| + * +--+--+ + * |p2|p3| + * +--+--+ + */ + if (spu_extract(mask, 0)) + colorTile->ui[y+0][x+0] = c0; + if (spu_extract(mask, 1)) + colorTile->ui[y+0][x+1] = c1; + if (spu_extract(mask, 2)) + colorTile->ui[y+1][x+0] = c2; + if (spu_extract(mask, 3)) + colorTile->ui[y+1][x+1] = c3; +#endif } diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h index 6571258699..f817abf046 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -1,32 +1,44 @@ -/* - * (C) Copyright IBM Corporation 2008 +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ #ifndef SPU_PER_FRAGMENT_OP #define SPU_PER_FRAGMENT_OP -extern qword -spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth, - qword frag_alpha, qword facing); + +extern void +spu_fallback_fragment_ops(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragRed, + vector float fragGreen, + vector float fragBlue, + vector float fragAlpha, + vector unsigned int mask); + #endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 5051774f00..117b8a36f8 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -97,7 +97,7 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) const qword offset_y = si_andi((qword) y, 0x1f); const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row); - const qword tile_size = (qword) spu_splats(sizeof(tile_t)); + const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); tile_offset = si_mpy((qword) tile_offset, tile_size); diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 2a4e0b423c..8b93878192 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -38,7 +38,6 @@ #include "spu_texture.h" #include "spu_tile.h" #include "spu_tri.h" -#include "spu_per_fragment_op.h" /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ @@ -209,7 +208,7 @@ clip_emit_quad(struct setup_stage *setup) /** * Evaluate attribute coefficients (plane equations) to compute * attribute values for the four fragments in a quad. - * Eg: four colors will be compute. + * Eg: four colors will be computed (in AoS format). */ static INLINE void eval_coeff(uint slot, float x, float y, vector float result[4]) @@ -255,31 +254,6 @@ eval_z(float x, float y) } -static INLINE mask_t -do_depth_test(int x, int y, mask_t quadmask) -{ - float4 zvals; - mask_t mask; - - if (spu.fb.depth_format == PIPE_FORMAT_NONE) - return quadmask; - - zvals.v = eval_z((float) x, (float) y); - - mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx, - y - setup.cliprect_miny, - (qword) quadmask, - (qword) zvals.v, - (qword) spu_splats((unsigned char) 0x0ffu), - (qword) spu_splats((unsigned int) 0x01u)); - - if (spu_extract(spu_orx(mask), 0)) - spu.cur_ztile_status = TILE_STATUS_DIRTY; - - return mask; -} - - /** * Emit a quad (pass to next stage). No clipping is done. * Note: about 1/5 to 1/7 of the time, mask is zero and this function @@ -289,18 +263,6 @@ do_depth_test(int x, int y, mask_t quadmask) static INLINE void emit_quad( int x, int y, mask_t mask ) { -#if 0 - struct softpipe_context *sp = setup.softpipe; - setup.quad.x0 = x; - setup.quad.y0 = y; - setup.quad.mask = mask; - sp->quad.first->run(sp->quad.first, &setup.quad); -#else - - if (spu.read_depth) { - mask = do_depth_test(x, y, mask); - } - /* If any bits in mask are set... */ if (spu_extract(spu_orx(mask), 0)) { const int ix = x - setup.cliprect_minx; @@ -308,6 +270,7 @@ emit_quad( int x, int y, mask_t mask ) vector float colors[4]; spu.cur_ctile_status = TILE_STATUS_DIRTY; + spu.cur_ztile_status = TILE_STATUS_DIRTY; if (spu.texture[0].start) { /* texture mapping */ @@ -351,59 +314,68 @@ emit_quad( int x, int y, mask_t mask ) } else { /* simple shading */ +#if 0 eval_coeff(1, (float) x, (float) y, colors); - } - - /* Convert fragment data from AoS to SoA format. - */ - qword soa_frag[4]; - _transpose_matrix4x4((vec_float4 *) soa_frag, colors); +#else + /* XXX new fragment program code */ + + if (spu.fragment_program) { + vector float inputs[4*4], outputs[2*4]; + + /* setup inputs */ + eval_coeff(1, (float) x, (float) y, inputs); + + /* Execute the current fragment program */ + spu.fragment_program(inputs, outputs, spu.constants); + + /* Copy outputs */ + colors[0] = outputs[0*4+0]; + colors[1] = outputs[0*4+1]; + colors[2] = outputs[0*4+2]; + colors[3] = outputs[0*4+3]; + + if (0 && spu.init.id==0 && y == 48) { + printf("colors[0] = %f %f %f %f\n", + spu_extract(colors[0], 0), + spu_extract(colors[0], 1), + spu_extract(colors[0], 2), + spu_extract(colors[0], 3)); + printf("colors[1] = %f %f %f %f\n", + spu_extract(colors[1], 0), + spu_extract(colors[1], 1), + spu_extract(colors[1], 2), + spu_extract(colors[1], 3)); + } - /* Read the current framebuffer values. - */ - const qword pix[4] = { - (qword) spu_splats(spu.ctile.ui[iy+0][ix+0]), - (qword) spu_splats(spu.ctile.ui[iy+0][ix+1]), - (qword) spu_splats(spu.ctile.ui[iy+1][ix+0]), - (qword) spu_splats(spu.ctile.ui[iy+1][ix+1]), - }; + } +#endif + } - qword soa_pix[4]; - if (spu.read_fb) { - /* Convert pixel data from AoS to SoA format. + { + /* Convert fragment data from AoS to SoA format. + * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) + * This is temporary! */ - vec_float4 aos_pix[4] = { - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]), - }; - - _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix); - } + vector float soa_frag[4]; + _transpose_matrix4x4(soa_frag, colors); + float4 fragZ; - struct spu_blend_results result = - (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3], - soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3], - spu.const_blend_color[0], spu.const_blend_color[1], - spu.const_blend_color[2], spu.const_blend_color[3]); + fragZ.v = eval_z((float) x, (float) y); + /* Do all per-fragment/quad operations here, including: + * alpha test, z test, stencil test, blend and framebuffer writing. + */ + spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, + fragZ.v, + soa_frag[0], soa_frag[1], + soa_frag[2], soa_frag[3], + mask); + } - /* Convert final pixel data from SoA to AoS format. - */ - result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3], - result.r, result.g, result.b, result.a, - (qword) mask); - - spu.ctile.ui[iy+0][ix+0] = spu_extract((vec_uint4) result.r, 0); - spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0); - spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0); - spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0); } -#endif } |