diff options
Diffstat (limited to 'src/gallium/drivers/cell/spu')
-rw-r--r-- | src/gallium/drivers/cell/spu/.gitignore | 1 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_main.c | 127 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_main.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 164 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_tri.c | 114 |
5 files changed, 251 insertions, 163 deletions
diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore new file mode 100644 index 0000000000..2be9a2d324 --- /dev/null +++ b/src/gallium/drivers/cell/spu/.gitignore @@ -0,0 +1 @@ +g3d_spu diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 78260c4259..b4d30228f7 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -50,7 +50,31 @@ helpful headers: /opt/cell/sdk/usr/include/libmisc.h */ +/* Set to 0 to disable all extraneous debugging code */ +#define DEBUG 1 + +#if DEBUG boolean Debug = FALSE; +boolean force_fragment_ops_fallback = TRUE; + +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define DEBUG_PRINTF(format,...) \ + if (Debug) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#define D_PRINTF(flag, format,...) \ + if (spu.init.debug_flags & (flag)) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) + +#else + +#define DEBUG_PRINTF(...) +#define D_PRINTF(...) + +#endif struct spu_global spu; @@ -133,9 +157,7 @@ really_clear_tiles(uint surfaceIndex) static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { - if (Debug) - printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id, - clear->surface, clear->value); + DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); if (clear->surface == 0) { spu.fb.color_clear_value = clear->value; @@ -203,17 +225,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) #endif /* CLEAR_OPT */ - if (Debug) - printf("SPU %u: CLEAR SURF done\n", spu.init.id); + DEBUG_PRINTF("CLEAR SURF done\n"); } static void cmd_release_verts(const struct cell_command_release_verts *release) { - if (Debug) - printf("SPU %u: RELEASE VERTS %u\n", - spu.init.id, release->vertex_buf); + DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf); ASSERT(release->vertex_buf != ~0U); release_buffer(release->vertex_buf); } @@ -228,16 +247,38 @@ cmd_release_verts(const struct cell_command_release_verts *release) static void cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) { - if (Debug) - printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); + static int warned = 0; + + DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); /* Copy state info (for fallback case only) */ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); - /* Point function pointer at new code */ - spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + /* Parity twist! For now, always use the fallback code by default, + * only switching to codegen when specifically requested. This + * allows us to develop freely without risking taking down the + * branch. + * + * Later, the parity of this check will be reversed, so that + * codegen is *always* used, unless we specifically indicate that + * we don't want it. + * + * Eventually, the option will be removed completely, because in + * final code we'll always use codegen and won't even provide the + * raw state records that the fallback code requires. + */ + if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) { + spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + } + else { + /* otherwise, the default fallback code remains in place */ + if (!warned) { + fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); + warned = 1; + } + } spu.read_depth = spu.depth_stencil_alpha.depth.enabled; spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; @@ -247,8 +288,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) static void cmd_state_fragment_program(const struct cell_command_fragment_program *fp) { - if (Debug) - printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id); + DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_program_code, fp->code, SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); @@ -262,9 +302,7 @@ cmd_state_fragment_program(const struct cell_command_fragment_program *fp) static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { - if (Debug) - printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", - spu.init.id, + DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", cmd->width, cmd->height, cmd->color_start, @@ -309,9 +347,7 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) static void cmd_state_sampler(const struct cell_command_sampler *sampler) { - if (Debug) - printf("SPU %u: SAMPLER [%u]\n", - spu.init.id, sampler->unit); + DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); spu.sampler[sampler->unit] = sampler->state; if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) @@ -328,11 +364,9 @@ cmd_state_texture(const struct cell_command_texture *texture) const uint width = texture->width; const uint height = texture->height; - if (Debug) { - printf("SPU %u: TEXTURE [%u] at %p size %u x %u\n", spu.init.id, + DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n", texture->unit, texture->start, texture->width, texture->height); - } spu.texture[unit].start = texture->start; spu.texture[unit].width = width; @@ -351,10 +385,7 @@ cmd_state_texture(const struct cell_command_texture *texture) static void cmd_state_vertex_info(const struct vertex_info *vinfo) { - if (Debug) { - printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id, - vinfo->num_attribs); - } + DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); ASSERT(vinfo->num_attribs >= 1); ASSERT(vinfo->num_attribs <= 8); memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); @@ -393,8 +424,7 @@ cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) static void cmd_finish(void) { - if (Debug) - printf("SPU %u: FINISH\n", spu.init.id); + DEBUG_PRINTF("FINISH\n"); really_clear_tiles(0); /* wait for all outstanding DMAs to finish */ mfc_write_tag_mask(~0); @@ -419,9 +449,8 @@ cmd_batch(uint opcode) const unsigned usize = size / sizeof(buffer[0]); uint pos; - if (Debug) - printf("SPU %u: BATCH buffer %u, len %u, from %p\n", - spu.init.id, buf, size, spu.init.buffers[buf]); + DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n", + buf, size, spu.init.buffers[buf]); ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); @@ -440,8 +469,7 @@ cmd_batch(uint opcode) wait_on_mask(1 << TAG_BATCH_BUFFER); /* Tell PPU we're done copying the buffer to local store */ - if (Debug) - printf("SPU %u: release batch buf %u\n", spu.init.id, buf); + DEBUG_PRINTF("release batch buf %u\n", buf); release_buffer(buf); /* @@ -571,8 +599,7 @@ cmd_batch(uint opcode) } } - if (Debug) - printf("SPU %u: BATCH complete\n", spu.init.id); + DEBUG_PRINTF("BATCH complete\n"); } @@ -585,8 +612,7 @@ main_loop(void) struct cell_command cmd; int exitFlag = 0; - if (Debug) - printf("SPU %u: Enter main loop\n", spu.init.id); + DEBUG_PRINTF("Enter main loop\n"); ASSERT((sizeof(struct cell_command) & 0xf) == 0); ASSERT_ALIGN16(&cmd); @@ -595,14 +621,12 @@ main_loop(void) unsigned opcode; int tag = 0; - if (Debug) - printf("SPU %u: Wait for cmd...\n", spu.init.id); + DEBUG_PRINTF("Wait for cmd...\n"); /* read/wait from mailbox */ opcode = (unsigned int) spu_read_in_mbox(); - if (Debug) - printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode); + DEBUG_PRINTF("got cmd 0x%x\n", opcode); /* command payload */ mfc_get(&cmd, /* dest */ @@ -619,8 +643,7 @@ main_loop(void) switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: - if (Debug) - printf("SPU %u: EXIT\n", spu.init.id); + DEBUG_PRINTF("EXIT\n"); exitFlag = 1; break; case CELL_CMD_VS_EXECUTE: @@ -632,13 +655,12 @@ main_loop(void) cmd_batch(opcode); break; default: - printf("Bad opcode!\n"); + printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); } } - if (Debug) - printf("SPU %u: Exit main loop\n", spu.init.id); + DEBUG_PRINTF("Exit main loop\n"); spu_dcache_report(); } @@ -653,7 +675,8 @@ one_time_init(void) invalidate_tex_cache(); /* Install default/fallback fragment processing function. - * This will normally be overriden by a code-gen'd function. + * This will normally be overriden by a code-gen'd function + * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. */ spu.fragment_ops = spu_fallback_fragment_ops; } @@ -682,11 +705,13 @@ main(main_param_t speid, main_param_t argp) ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); ASSERT(sizeof(struct cell_command_render) % 8 == 0); + ASSERT(((unsigned long) &spu.fragment_ops_code) % 32 == 0); + ASSERT(((unsigned long) &spu.fragment_program_code) % 32 == 0); one_time_init(); - if (Debug) - printf("SPU: main() speid=%lu\n", (unsigned long) speid); + DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid); + D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); mfc_get(&spu.init, /* dest */ (unsigned int) argp, /* src */ @@ -698,7 +723,7 @@ main(main_param_t speid, main_param_t argp) #if 0 if (spu.init.id==0) - spu_test_misc(); + spu_test_misc(spu.init.id); #endif main_loop(); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 2c7b625840..72e540fcff 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -143,13 +143,13 @@ struct spu_global ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - /** Current fragment ops machine code */ - uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS]; + /** Current fragment ops machine code, at 32-byte boundary */ + uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN32_ATTRIB; /** Current fragment ops function */ spu_fragment_ops_func fragment_ops; - /** Current fragment program machine code */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; + /** Current fragment program machine code, at 32-byte boundary */ + uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN32_ATTRIB; /** Current fragment ops function */ spu_fragment_program_func fragment_program; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index 03dd547845..f107764fb2 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -60,9 +60,12 @@ spu_fallback_fragment_ops(uint x, uint y, vector unsigned int mask) { vector float frag_aos[4]; - unsigned int c0, c1, c2, c3; + unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ + unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */ - /* do alpha test */ + /* + * Do alpha test + */ if (spu.depth_stencil_alpha.alpha.enabled) { vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref); vector unsigned int amask; @@ -102,7 +105,10 @@ spu_fallback_fragment_ops(uint x, uint y, mask = spu_and(mask, amask); } - /* Z and/or stencil testing... */ + + /* + * Z and/or stencil testing... + */ if (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled) { @@ -178,6 +184,32 @@ spu_fallback_fragment_ops(uint x, uint y, } } + + /* + * If we'll need the current framebuffer/tile colors for blending + * or logicop or colormask, fetch them now. + */ + if (spu.blend.blend_enable || + spu.blend.logicop_enable || + spu.blend.colormask != 0xf) { + +#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ + fbc0 = colorTile->ui[y][x*2+0]; + fbc1 = colorTile->ui[y][x*2+1]; + fbc2 = colorTile->ui[y][x*2+2]; + fbc3 = colorTile->ui[y][x*2+3]; +#else + fbc0 = colorTile->ui[y+0][x+0]; + fbc1 = colorTile->ui[y+0][x+1]; + fbc2 = colorTile->ui[y+1][x+0]; + fbc3 = colorTile->ui[y+1][x+1]; +#endif + } + + + /* + * Do blending + */ if (spu.blend.blend_enable) { /* blending terms, misc regs */ vector float term1r, term1g, term1b, term1a; @@ -186,39 +218,26 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fbRGBA[4]; /* current framebuffer colors */ - /* get colors from framebuffer/tile */ + /* convert framebuffer colors from packed int to vector float */ { - vector float fc[4]; - uint c0, c1, c2, c3; - -#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ - c0 = colorTile->ui[y][x*2+0]; - c1 = colorTile->ui[y][x*2+1]; - c2 = colorTile->ui[y][x*2+2]; - c3 = colorTile->ui[y][x*2+3]; -#else - c0 = colorTile->ui[y+0][x+0]; - c1 = colorTile->ui[y+0][x+1]; - c2 = colorTile->ui[y+1][x+0]; - c3 = colorTile->ui[y+1][x+1]; -#endif + vector float temp[4]; /* float colors in AOS form */ switch (spu.fb.color_format) { case PIPE_FORMAT_B8G8R8A8_UNORM: - fc[0] = spu_unpack_B8G8R8A8(c0); - fc[1] = spu_unpack_B8G8R8A8(c1); - fc[2] = spu_unpack_B8G8R8A8(c2); - fc[3] = spu_unpack_B8G8R8A8(c3); + temp[0] = spu_unpack_B8G8R8A8(fbc0); + temp[1] = spu_unpack_B8G8R8A8(fbc1); + temp[2] = spu_unpack_B8G8R8A8(fbc2); + temp[3] = spu_unpack_B8G8R8A8(fbc3); break; case PIPE_FORMAT_A8R8G8B8_UNORM: - fc[0] = spu_unpack_A8R8G8B8(c0); - fc[1] = spu_unpack_A8R8G8B8(c1); - fc[2] = spu_unpack_A8R8G8B8(c2); - fc[3] = spu_unpack_A8R8G8B8(c3); + temp[0] = spu_unpack_A8R8G8B8(fbc0); + temp[1] = spu_unpack_A8R8G8B8(fbc1); + temp[2] = spu_unpack_A8R8G8B8(fbc2); + temp[3] = spu_unpack_A8R8G8B8(fbc3); break; default: ASSERT(0); } - _transpose_matrix4x4(fbRGBA, fc); + _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */ } /* @@ -384,21 +403,20 @@ spu_fallback_fragment_ops(uint x, uint y, #endif /* - * Pack float colors into 32-bit RGBA words. + * Pack fragment float colors into 32-bit RGBA words. */ switch (spu.fb.color_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - c0 = spu_pack_A8R8G8B8(frag_aos[0]); - c1 = spu_pack_A8R8G8B8(frag_aos[1]); - c2 = spu_pack_A8R8G8B8(frag_aos[2]); - c3 = spu_pack_A8R8G8B8(frag_aos[3]); + fragc0 = spu_pack_A8R8G8B8(frag_aos[0]); + fragc1 = spu_pack_A8R8G8B8(frag_aos[1]); + fragc2 = spu_pack_A8R8G8B8(frag_aos[2]); + fragc3 = spu_pack_A8R8G8B8(frag_aos[3]); break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - c0 = spu_pack_B8G8R8A8(frag_aos[0]); - c1 = spu_pack_B8G8R8A8(frag_aos[1]); - c2 = spu_pack_B8G8R8A8(frag_aos[2]); - c3 = spu_pack_B8G8R8A8(frag_aos[3]); + fragc0 = spu_pack_B8G8R8A8(frag_aos[0]); + fragc1 = spu_pack_B8G8R8A8(frag_aos[1]); + fragc2 = spu_pack_B8G8R8A8(frag_aos[2]); + fragc3 = spu_pack_B8G8R8A8(frag_aos[3]); break; default: fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); @@ -407,20 +425,57 @@ spu_fallback_fragment_ops(uint x, uint y, /* - * Color masking + * Do color masking */ if (spu.blend.colormask != 0xf) { - /* XXX to do */ - /* apply color mask to 32-bit packed colors */ + uint cmask = 0x0; /* each byte corresponds to a color channel */ + + /* Form bitmask depending on color buffer format and colormask bits */ + switch (spu.fb.color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + if (spu.blend.colormask & (1<<0)) + cmask |= 0x00ff0000; /* red */ + if (spu.blend.colormask & (1<<1)) + cmask |= 0x0000ff00; /* green */ + if (spu.blend.colormask & (1<<2)) + cmask |= 0x000000ff; /* blue */ + if (spu.blend.colormask & (1<<3)) + cmask |= 0xff000000; /* alpha */ + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + if (spu.blend.colormask & (1<<0)) + cmask |= 0x0000ff00; /* red */ + if (spu.blend.colormask & (1<<1)) + cmask |= 0x00ff0000; /* green */ + if (spu.blend.colormask & (1<<2)) + cmask |= 0xff000000; /* blue */ + if (spu.blend.colormask & (1<<3)) + cmask |= 0x000000ff; /* alpha */ + break; + default: + ASSERT(0); + } + + /* + * Apply color mask to the 32-bit packed colors. + * if (cmask[i]) + * frag color[i] = frag color[i]; + * else + * frag color[i] = framebuffer color[i]; + */ + fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask); + fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask); + fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask); + fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask); } /* - * Logic Ops + * Do logic ops */ if (spu.blend.logicop_enable) { /* XXX to do */ - /* apply logicop to 32-bit packed colors */ + /* apply logicop to 32-bit packed colors (fragcx and fbcx) */ } @@ -431,45 +486,46 @@ spu_fallback_fragment_ops(uint x, uint y, spu.cur_ctile_status = TILE_STATUS_DIRTY; } else { + /* write no fragments */ return; } /* - * Write new quad colors to the framebuffer/tile. + * Write new fragment/quad colors to the framebuffer/tile. * Only write pixels where the corresponding mask word is set. */ #if LINEAR_QUAD_LAYOUT /* * Quad layout: * +--+--+--+--+ - * |p0|p1|p2|p3| + * |p0|p1|p2|p3|... * +--+--+--+--+ */ if (spu_extract(mask, 0)) - colorTile->ui[y][x*2] = c0; + colorTile->ui[y][x*2] = fragc0; if (spu_extract(mask, 1)) - colorTile->ui[y][x*2+1] = c1; + colorTile->ui[y][x*2+1] = fragc1; if (spu_extract(mask, 2)) - colorTile->ui[y][x*2+2] = c2; + colorTile->ui[y][x*2+2] = fragc2; if (spu_extract(mask, 3)) - colorTile->ui[y][x*2+3] = c3; + colorTile->ui[y][x*2+3] = fragc3; #else /* * Quad layout: * +--+--+ - * |p0|p1| + * |p0|p1|... * +--+--+ - * |p2|p3| + * |p2|p3|... * +--+--+ */ if (spu_extract(mask, 0)) - colorTile->ui[y+0][x+0] = c0; + colorTile->ui[y+0][x+0] = fragc0; if (spu_extract(mask, 1)) - colorTile->ui[y+0][x+1] = c1; + colorTile->ui[y+0][x+1] = fragc1; if (spu_extract(mask, 2)) - colorTile->ui[y+1][x+0] = c2; + colorTile->ui[y+1][x+0] = fragc2; if (spu_extract(mask, 3)) - colorTile->ui[y+1][x+1] = c3; + colorTile->ui[y+1][x+1] = fragc3; #endif } diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 8b93878192..0a8fb56a62 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -241,6 +241,19 @@ eval_coeff(uint slot, float x, float y, vector float result[4]) } +/** + * As above, but return 4 vectors in SOA format. + * XXX this will all be re-written someday. + */ +static INLINE void +eval_coeff_soa(uint slot, float x, float y, vector float result[4]) +{ + eval_coeff(slot, x, y, result); + _transpose_matrix4x4(result, result); +} + + + static INLINE vector float eval_z(float x, float y) { @@ -267,14 +280,17 @@ emit_quad( int x, int y, mask_t mask ) if (spu_extract(spu_orx(mask), 0)) { const int ix = x - setup.cliprect_minx; const int iy = y - setup.cliprect_miny; - vector float colors[4]; spu.cur_ctile_status = TILE_STATUS_DIRTY; spu.cur_ztile_status = TILE_STATUS_DIRTY; if (spu.texture[0].start) { - /* texture mapping */ + /* + * Temporary texture mapping path + * This will go away when fragment programs support TEX inst. + */ const uint unit = 0; + vector float colors[4]; vector float texcoords[4]; eval_coeff(2, (float) x, (float) y, texcoords); @@ -311,70 +327,60 @@ emit_quad( int x, int y, mask_t mask ) colors[3] = spu_mul(colors[3], colors1[3]); } + { + /* Convert fragment data from AoS to SoA format. + * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) + * This is temporary! + */ + vector float soa_frag[4]; + _transpose_matrix4x4(soa_frag, colors); + + vector float fragZ = eval_z((float) x, (float) y); + + /* Do all per-fragment/quad operations here, including: + * alpha test, z test, stencil test, blend and framebuffer writing. + */ + spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, + fragZ, + soa_frag[0], soa_frag[1], + soa_frag[2], soa_frag[3], + mask); + } + } else { - /* simple shading */ -#if 0 - eval_coeff(1, (float) x, (float) y, colors); + /* + * Run fragment shader, execute per-fragment ops, update fb/tile. + */ + vector float inputs[4*4], outputs[2*4]; + vector float fragZ = eval_z((float) x, (float) y); + /* setup inputs */ +#if 0 + eval_coeff_soa(1, (float) x, (float) y, inputs); #else - /* XXX new fragment program code */ - - if (spu.fragment_program) { - vector float inputs[4*4], outputs[2*4]; - - /* setup inputs */ - eval_coeff(1, (float) x, (float) y, inputs); - - /* Execute the current fragment program */ - spu.fragment_program(inputs, outputs, spu.constants); - - /* Copy outputs */ - colors[0] = outputs[0*4+0]; - colors[1] = outputs[0*4+1]; - colors[2] = outputs[0*4+2]; - colors[3] = outputs[0*4+3]; - - if (0 && spu.init.id==0 && y == 48) { - printf("colors[0] = %f %f %f %f\n", - spu_extract(colors[0], 0), - spu_extract(colors[0], 1), - spu_extract(colors[0], 2), - spu_extract(colors[0], 3)); - printf("colors[1] = %f %f %f %f\n", - spu_extract(colors[1], 0), - spu_extract(colors[1], 1), - spu_extract(colors[1], 2), - spu_extract(colors[1], 3)); - } - + uint i; + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + eval_coeff_soa(i+1, (float) x, (float) y, inputs + i * 4); } #endif - } + ASSERT(spu.fragment_program); + ASSERT(spu.fragment_ops); + /* Execute the current fragment program */ + spu.fragment_program(inputs, outputs, spu.constants); - { - /* Convert fragment data from AoS to SoA format. - * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) - * This is temporary! - */ - vector float soa_frag[4]; - _transpose_matrix4x4(soa_frag, colors); - - float4 fragZ; - - fragZ.v = eval_z((float) x, (float) y); - - /* Do all per-fragment/quad operations here, including: - * alpha test, z test, stencil test, blend and framebuffer writing. + /* Execute per-fragment/quad operations, including: + * alpha test, z test, stencil test, blend and framebuffer writing. */ spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, - fragZ.v, - soa_frag[0], soa_frag[1], - soa_frag[2], soa_frag[3], + fragZ, + outputs[0*4+0], + outputs[0*4+1], + outputs[0*4+2], + outputs[0*4+3], mask); } - } } |