From c2372cc7481bf3985a6a3126952ab9d5dab4bf77 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 28 Jan 2008 17:22:12 -0700 Subject: Cell: initial texture cache/sampling code --- src/mesa/pipe/cell/spu/spu_texture.c | 139 +++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 src/mesa/pipe/cell/spu/spu_texture.c (limited to 'src/mesa/pipe/cell/spu/spu_texture.c') diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c new file mode 100644 index 0000000000..6d566a5006 --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -0,0 +1,139 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" + + +/** + * Number of texture tiles to cache. + * Note that this will probably be the largest consumer of SPU local store/ + * memory for this driver! + */ +#define CACHE_SIZE 16 + +static tile_t tex_tiles[CACHE_SIZE] ALIGN16_ATTRIB; + +static int tex_tile_x[CACHE_SIZE], tex_tile_y[CACHE_SIZE]; + + + +/** + * Mark all tex cache entries as invalid. + */ +void +invalidate_tex_cache(void) +{ + /* XXX memset? */ + uint i; + for (i = 0; i < CACHE_SIZE; i++) + tex_tile_x[i] = tex_tile_y[i] = -1; +} + + +/** + * Return the cache pos/index which corresponds to texel (i,j) + */ +static INLINE uint +cache_pos(uint i, uint j) +{ + uint tx = i / TILE_SIZE; + uint ty = j / TILE_SIZE; + uint pos = (tx + ty * 4) % CACHE_SIZE; + return pos; +} + + +/** + * Make sure the tile for texel (i,j) is present, return its position/index + * in the cache. + */ +static uint +get_tex_tile(uint i, uint j) +{ + const int tx = i / TILE_SIZE; + const int ty = j / TILE_SIZE; + const uint pos = cache_pos(i, j); + + if (tex_tile_x[pos] != tx || tex_tile_y[pos] != ty) { + /* texture cache miss, fetch tile from main memory */ + const uint tiles_per_row = spu.texture.width / TILE_SIZE; + const uint bytes_per_tile = sizeof(tile_t); + const void *src = (const ubyte *) spu.texture.start + + (ty * tiles_per_row + tx) * bytes_per_tile; + + printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n", + spu.init.id, tx, ty, pos, + tex_tile_x[pos], tex_tile_y[pos]); +#if 0 + printf("SPU %u: get tex tile from %p to %p\n", + spu.init.id, src, tex_tiles[pos].t32); +#endif + + ASSERT_ALIGN16(tex_tiles[pos].t32); + ASSERT_ALIGN16(src); + + mfc_get(tex_tiles[pos].t32, /* dest */ + (unsigned int) src, + bytes_per_tile, /* size */ + TAG_TEXTURE_TILE, + 0, /* tid */ + 0 /* rid */); + + wait_on_mask(1 << TAG_TEXTURE_TILE); + + tex_tile_x[pos] = tx; + tex_tile_y[pos] = ty; + } + else { +#if 0 + printf("SPU %u: tex cache HIT at %d, %d\n", + spu.init.id, tx, ty); +#endif + } + + return pos; +} + + +/** + * Get texture sample at texcoord. + * XXX this is extremely primitive for now. + */ +uint +sample_texture(const float *texcoord) +{ + /* wrap/repeat */ + uint i = (uint) (texcoord[0] * spu.texture.width) % spu.texture.width; + uint j = (uint) (texcoord[1] * spu.texture.height) % spu.texture.height; + uint pos = get_tex_tile(i, j); + uint texel = tex_tiles[pos].t32[j % TILE_SIZE][i % TILE_SIZE]; + return texel; +} -- cgit v1.2.3 From 24f0e54c1b9ff43dcb75758c8e0faba355c0617c Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 30 Jan 2008 15:26:51 -0700 Subject: Cell: start to SIMD-ize triangle attribute interpolation Using the spu_add(), etc intrinsics. About a 15% speed-up with some tests. --- src/mesa/pipe/cell/spu/spu_main.h | 7 ++ src/mesa/pipe/cell/spu/spu_texture.c | 6 +- src/mesa/pipe/cell/spu/spu_texture.h | 2 +- src/mesa/pipe/cell/spu/spu_tri.c | 126 +++++++++++++++++++---------------- 4 files changed, 79 insertions(+), 62 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_texture.c') diff --git a/src/mesa/pipe/cell/spu/spu_main.h b/src/mesa/pipe/cell/spu/spu_main.h index 8908bf8bc0..73f9ed29d6 100644 --- a/src/mesa/pipe/cell/spu/spu_main.h +++ b/src/mesa/pipe/cell/spu/spu_main.h @@ -36,6 +36,13 @@ #include "pipe/p_state.h" +typedef union +{ + vector float v; + float f[4]; +} float4; + + struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c index 6d566a5006..7a1ca097c0 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.c +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -128,11 +128,11 @@ get_tex_tile(uint i, uint j) * XXX this is extremely primitive for now. */ uint -sample_texture(const float *texcoord) +sample_texture(float4 texcoord) { /* wrap/repeat */ - uint i = (uint) (texcoord[0] * spu.texture.width) % spu.texture.width; - uint j = (uint) (texcoord[1] * spu.texture.height) % spu.texture.height; + uint i = (uint) (texcoord.f[0] * spu.texture.width) % spu.texture.width; + uint j = (uint) (texcoord.f[1] * spu.texture.height) % spu.texture.height; uint pos = get_tex_tile(i, j); uint texel = tex_tiles[pos].t32[j % TILE_SIZE][i % TILE_SIZE]; return texel; diff --git a/src/mesa/pipe/cell/spu/spu_texture.h b/src/mesa/pipe/cell/spu/spu_texture.h index b75b7ac44f..938a42b549 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.h +++ b/src/mesa/pipe/cell/spu/spu_texture.h @@ -37,7 +37,7 @@ invalidate_tex_cache(void); extern uint -sample_texture(const float *texcoord); +sample_texture(float4 texcoord); #endif /* SPU_TEXTURE_H */ diff --git a/src/mesa/pipe/cell/spu/spu_tri.c b/src/mesa/pipe/cell/spu/spu_tri.c index 1c615a6e6a..4fc6d90895 100644 --- a/src/mesa/pipe/cell/spu/spu_tri.c +++ b/src/mesa/pipe/cell/spu/spu_tri.c @@ -81,9 +81,9 @@ struct edge { struct interp_coef { - float a0[4]; - float dadx[4]; - float dady[4]; + float4 a0; + float4 dadx; + float4 dady; }; @@ -201,36 +201,31 @@ clip_emit_quad(struct setup_stage *setup) * Eg: four colors will be compute. */ static INLINE void -eval_coeff(uint slot, float x, float y, float result[4][4]) +eval_coeff(uint slot, float x, float y, float4 result[4]) { switch (spu.vertex_info.interp_mode[slot]) { case INTERP_CONSTANT: - { - uint i; - for (i = 0; i < 4; i++) { - result[QUAD_TOP_LEFT][i] = - result[QUAD_TOP_RIGHT][i] = - result[QUAD_BOTTOM_LEFT][i] = - result[QUAD_BOTTOM_RIGHT][i] = setup.coef[slot].a0[i]; - } - } + result[QUAD_TOP_LEFT] = + result[QUAD_TOP_RIGHT] = + result[QUAD_BOTTOM_LEFT] = + result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0; break; case INTERP_LINEAR: /* fall-through, for now */ default: { - uint i; - const float *dadx = setup.coef[slot].dadx; - const float *dady = setup.coef[slot].dady; - - /* loop over XYZW comps */ - for (i = 0; i < 4; i++) { - result[QUAD_TOP_LEFT][i] = setup.coef[slot].a0[i] + x * dadx[i] + y * dady[i]; - result[QUAD_TOP_RIGHT][i] = result[0][i] + dadx[i]; - result[QUAD_BOTTOM_LEFT][i] = result[0][i] + dady[i]; - result[QUAD_BOTTOM_RIGHT][i] = result[0][i] + dadx[i] + dady[i]; - } + register vector float dadx = setup.coef[slot].dadx.v; + register vector float dady = setup.coef[slot].dady.v; + register vector float topLeft + = spu_add(setup.coef[slot].a0.v, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + result[QUAD_TOP_LEFT].v = topLeft; + result[QUAD_TOP_RIGHT].v = spu_add(topLeft, dadx); + result[QUAD_BOTTOM_LEFT].v = spu_add(topLeft, dady); + result[QUAD_BOTTOM_RIGHT].v = spu_add(spu_add(topLeft, dadx), dady); } } } @@ -240,28 +235,46 @@ static INLINE void eval_z(float x, float y, float result[4]) { const uint slot = 0; - const uint i = 2; - const float *dadx = setup.coef[slot].dadx; - const float *dady = setup.coef[slot].dady; - - result[QUAD_TOP_LEFT] = setup.coef[slot].a0[i] + x * dadx[i] + y * dady[i]; - result[QUAD_TOP_RIGHT] = result[0] + dadx[i]; - result[QUAD_BOTTOM_LEFT] = result[0] + dady[i]; - result[QUAD_BOTTOM_RIGHT] = result[0] + dadx[i] + dady[i]; + const float dzdx = setup.coef[slot].dadx.f[2]; + const float dzdy = setup.coef[slot].dady.f[2]; + const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy; +#if 1 + result[QUAD_TOP_LEFT] = topLeft; + result[QUAD_TOP_RIGHT] = topLeft + dzdx; + result[QUAD_BOTTOM_LEFT] = topLeft + dzdy; + result[QUAD_BOTTOM_RIGHT] = topLeft + dzdx + dzdy; +#else + /* XXX vectorize */ + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs + = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; + vector float *res = (vector float *) result; + *res = spu_add(topLeftv, derivs); +#endif } -static INLINE uint -pack_color(const float color[4]) +static INLINE void +pack_colors(uint uicolors[4], const float4 fcolors[4]) { + /* XXX grab the code for _pack_rgba8() and use the shuffle + * command to do the swizzling seen here. + */ switch (spu.fb.color_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - return _pack_rgba8(color[3], color[0], color[1], color[2]); + uicolors[0] = _pack_rgba8(fcolors[0].f[3], fcolors[0].f[0], fcolors[0].f[1], fcolors[0].f[2]); + uicolors[1] = _pack_rgba8(fcolors[1].f[3], fcolors[1].f[0], fcolors[1].f[1], fcolors[1].f[2]); + uicolors[2] = _pack_rgba8(fcolors[2].f[3], fcolors[2].f[0], fcolors[2].f[1], fcolors[2].f[2]); + uicolors[3] = _pack_rgba8(fcolors[3].f[3], fcolors[0].f[0], fcolors[3].f[1], fcolors[3].f[2]); + break; case PIPE_FORMAT_B8G8R8A8_UNORM: - return _pack_rgba8(color[2], color[1], color[0], color[3]); + uicolors[0] = _pack_rgba8(fcolors[0].f[2], fcolors[0].f[1], fcolors[0].f[0], fcolors[0].f[3]); + uicolors[1] = _pack_rgba8(fcolors[1].f[2], fcolors[1].f[1], fcolors[1].f[0], fcolors[1].f[3]); + uicolors[2] = _pack_rgba8(fcolors[2].f[2], fcolors[2].f[1], fcolors[2].f[0], fcolors[2].f[3]); + uicolors[3] = _pack_rgba8(fcolors[3].f[2], fcolors[3].f[1], fcolors[3].f[0], fcolors[3].f[3]); + break; default: ASSERT(0); - return 0; } } @@ -379,7 +392,7 @@ emit_quad( int x, int y, unsigned mask ) uint colors[4]; /* indexed by QUAD_x */ if (spu.texture.start) { - float texcoords[4][4]; + float4 texcoords[4]; uint i; eval_coeff(2, (float) x, (float) y, texcoords); for (i = 0; i < 4; i++) { @@ -387,12 +400,9 @@ emit_quad( int x, int y, unsigned mask ) } } else { - float fcolors[4][4]; + float4 fcolors[4]; eval_coeff(1, (float) x, (float) y, fcolors); - colors[QUAD_TOP_LEFT] = pack_color(fcolors[QUAD_TOP_LEFT]); - colors[QUAD_TOP_RIGHT] = pack_color(fcolors[QUAD_TOP_RIGHT]); - colors[QUAD_BOTTOM_LEFT] = pack_color(fcolors[QUAD_BOTTOM_LEFT]); - colors[QUAD_BOTTOM_RIGHT] = pack_color(fcolors[QUAD_BOTTOM_RIGHT]); + pack_colors(colors, fcolors); } if (spu.depth_stencil.depth.enabled) { @@ -645,12 +655,12 @@ static void const_coeff(uint slot) ASSERT(slot < PIPE_MAX_SHADER_INPUTS); for (i = 0; i < 4; i++) { - setup.coef[slot].dadx[i] = 0; - setup.coef[slot].dady[i] = 0; + setup.coef[slot].dadx.f[i] = 0; + setup.coef[slot].dady.f[i] = 0; /* need provoking vertex info! */ - setup.coef[slot].a0[i] = setup.vprovoke->data[slot][i]; + setup.coef[slot].a0.f[i] = setup.vprovoke->data[slot][i]; } } @@ -670,8 +680,8 @@ static void tri_linear_coeff( uint slot, uint firstComp, uint lastComp ) ASSERT(slot < PIPE_MAX_SHADER_INPUTS); - setup.coef[slot].dadx[i] = a * setup.oneoverarea; - setup.coef[slot].dady[i] = b * setup.oneoverarea; + setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; + setup.coef[slot].dady.f[i] = b * setup.oneoverarea; /* calculate a0 as the value which would be sampled for the * fragment at (0,0), taking into account that we want to sample at @@ -685,17 +695,17 @@ static void tri_linear_coeff( uint slot, uint firstComp, uint lastComp ) * to define a0 as the sample at a pixel center somewhere near vmin * instead - i'll switch to this later. */ - setup.coef[slot].a0[i] = (setup.vmin->data[slot][i] - - (setup.coef[slot].dadx[i] * (setup.vmin->data[0][0] - 0.5f) + - setup.coef[slot].dady[i] * (setup.vmin->data[0][1] - 0.5f))); + setup.coef[slot].a0.f[i] = (setup.vmin->data[slot][i] - + (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) + + setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f))); } /* _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", slot, "xyzw"[i], setup.coef[slot].a0[i], - setup.coef[slot].dadx[i], - setup.coef[slot].dady[i]); + setup.coef[slot].dadx.f[i], + setup.coef[slot].dady.f[i]); */ } @@ -734,11 +744,11 @@ static void tri_persp_coeff( unsigned slot, assert(slot < PIPE_MAX_SHADER_INPUTS); assert(i <= 3); - setup.coef[slot].dadx[i] = a * setup.oneoverarea; - setup.coef[slot].dady[i] = b * setup.oneoverarea; - setup.coef[slot].a0[i] = (mina - - (setup.coef[slot].dadx[i] * (setup.vmin->data[0][0] - 0.5f) + - setup.coef[slot].dady[i] * (setup.vmin->data[0][1] - 0.5f))); + setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; + setup.coef[slot].dady.f[i] = b * setup.oneoverarea; + setup.coef[slot].a0.f[i] = (mina - + (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) + + setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f))); } #endif -- cgit v1.2.3 From c392cc8f1bcaaecc2cc723fc5550e5f6462602f3 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 1 Feb 2008 13:49:51 -0700 Subject: Cell: rename fields of the tile_t union --- src/mesa/pipe/cell/spu/spu_main.c | 8 ++++++++ src/mesa/pipe/cell/spu/spu_texture.c | 6 +++--- src/mesa/pipe/cell/spu/spu_tile.c | 4 ++-- src/mesa/pipe/cell/spu/spu_tile.h | 18 +++++------------- src/mesa/pipe/cell/spu/spu_tri.c | 8 ++++---- 5 files changed, 22 insertions(+), 22 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_texture.c') diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c index d6393048f5..7d6e910ad5 100644 --- a/src/mesa/pipe/cell/spu/spu_main.c +++ b/src/mesa/pipe/cell/spu/spu_main.c @@ -36,6 +36,7 @@ #include "spu_render.h" #include "spu_texture.h" #include "spu_tile.h" +//#include "spu_test.h" #include "spu_vertex_shader.h" #include "pipe/cell/common.h" #include "pipe/p_defines.h" @@ -495,6 +496,7 @@ one_time_init(void) } + /* In some versions of the SDK the SPE main takes 'unsigned long' as a * parameter. In others it takes 'unsigned long long'. Use a define to * select between the two. @@ -515,6 +517,8 @@ main(main_param_t speid, main_param_t argp) (void) speid; + ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); + one_time_init(); if (Debug) @@ -528,6 +532,10 @@ main(main_param_t speid, main_param_t argp) 0 /* rid */); wait_on_mask( 1 << tag ); +#if 0 + if (spu.init.id==0) + spu_test_misc(); +#endif main_loop(); diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c index 7a1ca097c0..c1dc6bfe90 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.c +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -97,10 +97,10 @@ get_tex_tile(uint i, uint j) spu.init.id, src, tex_tiles[pos].t32); #endif - ASSERT_ALIGN16(tex_tiles[pos].t32); + ASSERT_ALIGN16(tex_tiles[pos].ui); ASSERT_ALIGN16(src); - mfc_get(tex_tiles[pos].t32, /* dest */ + mfc_get(tex_tiles[pos].ui, /* dest */ (unsigned int) src, bytes_per_tile, /* size */ TAG_TEXTURE_TILE, @@ -134,6 +134,6 @@ sample_texture(float4 texcoord) uint i = (uint) (texcoord.f[0] * spu.texture.width) % spu.texture.width; uint j = (uint) (texcoord.f[1] * spu.texture.height) % spu.texture.height; uint pos = get_tex_tile(i, j); - uint texel = tex_tiles[pos].t32[j % TILE_SIZE][i % TILE_SIZE]; + uint texel = tex_tiles[pos].ui[j % TILE_SIZE][i % TILE_SIZE]; return texel; } diff --git a/src/mesa/pipe/cell/spu/spu_tile.c b/src/mesa/pipe/cell/spu/spu_tile.c index aea4785bc2..fd65c2b49c 100644 --- a/src/mesa/pipe/cell/spu/spu_tile.c +++ b/src/mesa/pipe/cell/spu/spu_tile.c @@ -56,7 +56,7 @@ get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) printf("get_tile: dest: %p src: 0x%x size: %d\n", tile, (unsigned int) src, bytesPerTile); */ - mfc_get(tile->t32, /* dest in local memory */ + mfc_get(tile->ui, /* dest in local memory */ (unsigned int) src, /* src in main memory */ bytesPerTile, tag, @@ -82,7 +82,7 @@ put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) spu.init.id, tile, (unsigned int) dst, bytesPerTile); */ - mfc_put((void *) tile->t32, /* src in local memory */ + mfc_put((void *) tile->ui, /* src in local memory */ (unsigned int) dst, /* dst in main memory */ bytesPerTile, tag, diff --git a/src/mesa/pipe/cell/spu/spu_tile.h b/src/mesa/pipe/cell/spu/spu_tile.h index 4b1ef2a4c8..85a0d55807 100644 --- a/src/mesa/pipe/cell/spu/spu_tile.h +++ b/src/mesa/pipe/cell/spu/spu_tile.h @@ -40,8 +40,8 @@ typedef union { - ushort t16[TILE_SIZE][TILE_SIZE]; - uint t32[TILE_SIZE][TILE_SIZE]; + ushort us[TILE_SIZE][TILE_SIZE]; + uint ui[TILE_SIZE][TILE_SIZE]; vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; } tile_t; @@ -74,7 +74,7 @@ put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); static INLINE void clear_c_tile(tile_t *ctile) { - memset32((uint*) ctile->t32, + memset32((uint*) ctile->ui, spu.fb.color_clear_value, TILE_SIZE * TILE_SIZE); } @@ -84,23 +84,15 @@ static INLINE void clear_z_tile(tile_t *ztile) { if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - memset16((ushort*) ztile->t16, + memset16((ushort*) ztile->us, spu.fb.depth_clear_value, TILE_SIZE * TILE_SIZE); } else { ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM); -#if SIMD_Z - union fi z; - z.f = 1.0; - memset32((uint*) ztile->t32, - z.i,/*spu.fb.depth_clear_value,*/ - TILE_SIZE * TILE_SIZE); -#else - memset32((uint*) ztile->t32, + memset32((uint*) ztile->ui, spu.fb.depth_clear_value, TILE_SIZE * TILE_SIZE); -#endif } } diff --git a/src/mesa/pipe/cell/spu/spu_tri.c b/src/mesa/pipe/cell/spu/spu_tri.c index a26a4f098d..b04b6841c0 100644 --- a/src/mesa/pipe/cell/spu/spu_tri.c +++ b/src/mesa/pipe/cell/spu/spu_tri.c @@ -349,13 +349,13 @@ emit_quad( int x, int y, mask_t mask ) cur_tile_status_c = TILE_STATUS_DIRTY; if (spu_extract(mask, 0)) - ctile.t32[iy][ix] = colors[QUAD_TOP_LEFT]; + ctile.ui[iy][ix] = colors[QUAD_TOP_LEFT]; if (spu_extract(mask, 1)) - ctile.t32[iy][ix+1] = colors[QUAD_TOP_RIGHT]; + ctile.ui[iy][ix+1] = colors[QUAD_TOP_RIGHT]; if (spu_extract(mask, 2)) - ctile.t32[iy+1][ix] = colors[QUAD_BOTTOM_LEFT]; + ctile.ui[iy+1][ix] = colors[QUAD_BOTTOM_LEFT]; if (spu_extract(mask, 3)) - ctile.t32[iy+1][ix+1] = colors[QUAD_BOTTOM_RIGHT]; + ctile.ui[iy+1][ix+1] = colors[QUAD_BOTTOM_RIGHT]; #if 0 /* SIMD_Z with swizzled color buffer (someday) */ -- cgit v1.2.3 From 18105195a86b8294b578462febf47692832e8705 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 4 Feb 2008 09:54:21 -0700 Subject: Cell: checkpoint: start to SIMD-ize texture sampling --- src/mesa/pipe/cell/spu/spu_main.c | 10 ++++++++++ src/mesa/pipe/cell/spu/spu_main.h | 4 ++++ src/mesa/pipe/cell/spu/spu_texture.c | 17 ++++++++++++++--- src/mesa/pipe/cell/spu/spu_texture.h | 2 +- src/mesa/pipe/cell/spu/spu_tri.c | 8 ++++---- 5 files changed, 33 insertions(+), 8 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_texture.c') diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c index ba4d180cc0..412661061a 100644 --- a/src/mesa/pipe/cell/spu/spu_main.c +++ b/src/mesa/pipe/cell/spu/spu_main.c @@ -263,6 +263,16 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.init.id, texture->start, texture->width, texture->height); memcpy(&spu.texture, texture, sizeof(*texture)); + spu.tex_size = VEC_LITERAL(vector float, + spu.texture.width, + spu.texture.height, + 0.0, + 0.0); + spu.tex_size_mask = VEC_LITERAL(vector unsigned int, + spu.texture.width - 1, + spu.texture.height - 1, + 0, + 0); } diff --git a/src/mesa/pipe/cell/spu/spu_main.h b/src/mesa/pipe/cell/spu/spu_main.h index 7a12715b0b..02b62ee5cd 100644 --- a/src/mesa/pipe/cell/spu/spu_main.h +++ b/src/mesa/pipe/cell/spu/spu_main.h @@ -110,6 +110,10 @@ struct spu_global /** for converting RGBA to PIPE_FORMAT_x colors */ vector unsigned char color_shuffle; + + vector float tex_size; + vector unsigned int tex_size_mask; /**< == int(size - 1) */ + } ALIGN16_ATTRIB; diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c index c1dc6bfe90..1cf958806f 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.c +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -128,12 +128,23 @@ get_tex_tile(uint i, uint j) * XXX this is extremely primitive for now. */ uint -sample_texture(float4 texcoord) +sample_texture(vector float texcoord) { +#if 0 /* wrap/repeat */ - uint i = (uint) (texcoord.f[0] * spu.texture.width) % spu.texture.width; - uint j = (uint) (texcoord.f[1] * spu.texture.height) % spu.texture.height; + uint i = (uint) (spu_extract(texcoord, 0) * spu.texture.width) % spu.texture.width; + uint j = (uint) (spu_extract(texcoord, 1) * spu.texture.height) % spu.texture.height; uint pos = get_tex_tile(i, j); uint texel = tex_tiles[pos].ui[j % TILE_SIZE][i % TILE_SIZE]; return texel; +#else + vector float tc = spu_mul(texcoord, spu.tex_size); + vector unsigned int itc = spu_convtu(tc, 0); + itc = spu_and(itc, spu.tex_size_mask); + uint i = spu_extract(itc, 0); + uint j = spu_extract(itc, 1); + uint pos = get_tex_tile(i, j); + uint texel = tex_tiles[pos].ui[j % TILE_SIZE][i % TILE_SIZE]; + return texel; +#endif } diff --git a/src/mesa/pipe/cell/spu/spu_texture.h b/src/mesa/pipe/cell/spu/spu_texture.h index 938a42b549..5bc8e71879 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.h +++ b/src/mesa/pipe/cell/spu/spu_texture.h @@ -37,7 +37,7 @@ invalidate_tex_cache(void); extern uint -sample_texture(float4 texcoord); +sample_texture(vector float texcoord); #endif /* SPU_TEXTURE_H */ diff --git a/src/mesa/pipe/cell/spu/spu_tri.c b/src/mesa/pipe/cell/spu/spu_tri.c index 3f46e75d7c..c148c75dd6 100644 --- a/src/mesa/pipe/cell/spu/spu_tri.c +++ b/src/mesa/pipe/cell/spu/spu_tri.c @@ -309,13 +309,13 @@ emit_quad( int x, int y, mask_t mask ) eval_coeff(2, (float) x, (float) y, texcoords); if (spu_extract(mask, 0)) - spu.ctile.ui[iy][ix] = sample_texture(texcoords[0]); + spu.ctile.ui[iy][ix] = sample_texture(texcoords[0].v); if (spu_extract(mask, 1)) - spu.ctile.ui[iy][ix+1] = sample_texture(texcoords[1]); + spu.ctile.ui[iy][ix+1] = sample_texture(texcoords[1].v); if (spu_extract(mask, 2)) - spu.ctile.ui[iy+1][ix] = sample_texture(texcoords[2]); + spu.ctile.ui[iy+1][ix] = sample_texture(texcoords[2].v); if (spu_extract(mask, 3)) - spu.ctile.ui[iy+1][ix+1] = sample_texture(texcoords[3]); + spu.ctile.ui[iy+1][ix+1] = sample_texture(texcoords[3].v); } else { /* simple shading */ -- cgit v1.2.3 From 703a8691553386242bf3d6662c314fc35b617194 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 4 Feb 2008 11:02:47 -0700 Subject: Cell: SIMD-ize more of texture sampling --- src/mesa/pipe/cell/spu/spu_texture.c | 66 ++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 37 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_texture.c') diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c index 1cf958806f..b52df970d0 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.c +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -26,6 +26,8 @@ **************************************************************************/ +#include + #include "pipe/p_compiler.h" #include "spu_main.h" #include "spu_texture.h" @@ -41,7 +43,7 @@ static tile_t tex_tiles[CACHE_SIZE] ALIGN16_ATTRIB; -static int tex_tile_x[CACHE_SIZE], tex_tile_y[CACHE_SIZE]; +static vector unsigned int tex_tile_xy[CACHE_SIZE]; @@ -53,20 +55,19 @@ invalidate_tex_cache(void) { /* XXX memset? */ uint i; - for (i = 0; i < CACHE_SIZE; i++) - tex_tile_x[i] = tex_tile_y[i] = -1; + for (i = 0; i < CACHE_SIZE; i++) { + tex_tile_xy[i] = VEC_LITERAL(vector unsigned int, ~0U, ~0U, ~0U, ~0U); + } } /** - * Return the cache pos/index which corresponds to texel (i,j) + * Return the cache pos/index which corresponds to tile (tx,ty) */ static INLINE uint -cache_pos(uint i, uint j) +cache_pos(vector unsigned int txty) { - uint tx = i / TILE_SIZE; - uint ty = j / TILE_SIZE; - uint pos = (tx + ty * 4) % CACHE_SIZE; + uint pos = (spu_extract(txty,0) + spu_extract(txty,1) * 4) % CACHE_SIZE; return pos; } @@ -76,26 +77,28 @@ cache_pos(uint i, uint j) * in the cache. */ static uint -get_tex_tile(uint i, uint j) +get_tex_tile(vector unsigned int ij) { - const int tx = i / TILE_SIZE; - const int ty = j / TILE_SIZE; - const uint pos = cache_pos(i, j); + /* tile address: tx,ty */ + const vector unsigned int txty = spu_rlmask(ij, -5); /* divide by 32 */ + const uint pos = cache_pos(txty); + + if ((spu_extract(tex_tile_xy[pos], 0) != spu_extract(txty, 0)) || + (spu_extract(tex_tile_xy[pos], 1) != spu_extract(txty, 1))) { - if (tex_tile_x[pos] != tx || tex_tile_y[pos] != ty) { /* texture cache miss, fetch tile from main memory */ const uint tiles_per_row = spu.texture.width / TILE_SIZE; const uint bytes_per_tile = sizeof(tile_t); const void *src = (const ubyte *) spu.texture.start - + (ty * tiles_per_row + tx) * bytes_per_tile; + + (spu_extract(txty,1) * tiles_per_row + spu_extract(txty,0)) * bytes_per_tile; printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n", - spu.init.id, tx, ty, pos, - tex_tile_x[pos], tex_tile_y[pos]); -#if 0 - printf("SPU %u: get tex tile from %p to %p\n", - spu.init.id, src, tex_tiles[pos].t32); -#endif + spu.init.id, + spu_extract(txty,0), + spu_extract(txty,1), + pos, + spu_extract(tex_tile_xy[pos],0), + spu_extract(tex_tile_xy[pos],1)); ASSERT_ALIGN16(tex_tiles[pos].ui); ASSERT_ALIGN16(src); @@ -109,8 +112,7 @@ get_tex_tile(uint i, uint j) wait_on_mask(1 << TAG_TEXTURE_TILE); - tex_tile_x[pos] = tx; - tex_tile_y[pos] = ty; + tex_tile_xy[pos] = txty; } else { #if 0 @@ -130,21 +132,11 @@ get_tex_tile(uint i, uint j) uint sample_texture(vector float texcoord) { -#if 0 - /* wrap/repeat */ - uint i = (uint) (spu_extract(texcoord, 0) * spu.texture.width) % spu.texture.width; - uint j = (uint) (spu_extract(texcoord, 1) * spu.texture.height) % spu.texture.height; - uint pos = get_tex_tile(i, j); - uint texel = tex_tiles[pos].ui[j % TILE_SIZE][i % TILE_SIZE]; - return texel; -#else vector float tc = spu_mul(texcoord, spu.tex_size); - vector unsigned int itc = spu_convtu(tc, 0); - itc = spu_and(itc, spu.tex_size_mask); - uint i = spu_extract(itc, 0); - uint j = spu_extract(itc, 1); - uint pos = get_tex_tile(i, j); - uint texel = tex_tiles[pos].ui[j % TILE_SIZE][i % TILE_SIZE]; + vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ + itc = spu_and(itc, spu.tex_size_mask); /* mask (GL_REPEAT) */ + vector unsigned int ij = spu_and(itc, TILE_SIZE-1); /* intra tile addr */ + uint pos = get_tex_tile(itc); + uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)]; return texel; -#endif } -- cgit v1.2.3 From 0a45f7594870cb7296100fb5f5d5dc82888a467d Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 4 Feb 2008 12:50:42 -0700 Subject: Cell: implement basic bilinear texture sampler --- src/mesa/pipe/cell/spu/spu_texture.c | 67 ++++++++++++++++++++++++++++++++++++ src/mesa/pipe/cell/spu/spu_texture.h | 4 +++ 2 files changed, 71 insertions(+) (limited to 'src/mesa/pipe/cell/spu/spu_texture.c') diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c index b52df970d0..26a5eefc48 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.c +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -32,6 +32,7 @@ #include "spu_main.h" #include "spu_texture.h" #include "spu_tile.h" +#include "spu_colorpack.h" /** @@ -140,3 +141,69 @@ sample_texture(vector float texcoord) uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)]; return texel; } + + +uint +sample_texture_bilinear(vector float texcoord) +{ + static const vector unsigned int offset10 = {1, 0, 0, 0}; + static const vector unsigned int offset01 = {0, 1, 0, 0}; + + vector float tc = spu_mul(texcoord, spu.tex_size); + /* itcST */ + vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */ + vector unsigned int itc01 = spu_add(itc00, offset01); + vector unsigned int itc10 = spu_add(itc00, offset10); + vector unsigned int itc11 = spu_add(itc10, offset01); + + itc00 = spu_and(itc00, spu.tex_size_mask); /* mask (GL_REPEAT) */ + itc01 = spu_and(itc01, spu.tex_size_mask); /* mask (GL_REPEAT) */ + itc10 = spu_and(itc10, spu.tex_size_mask); /* mask (GL_REPEAT) */ + itc11 = spu_and(itc11, spu.tex_size_mask); /* mask (GL_REPEAT) */ + + /* intra tile addr */ + vector unsigned int ij00 = spu_and(itc00, TILE_SIZE-1); + vector unsigned int ij01 = spu_and(itc01, TILE_SIZE-1); + vector unsigned int ij10 = spu_and(itc10, TILE_SIZE-1); + vector unsigned int ij11 = spu_and(itc11, TILE_SIZE-1); + + uint pos00 = get_tex_tile(itc00); + uint pos01 = get_tex_tile(itc01); + uint pos10 = get_tex_tile(itc10); + uint pos11 = get_tex_tile(itc11); + + vector float texel00 = spu_unpack_color(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]); + vector float texel01 = spu_unpack_color(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]); + vector float texel10 = spu_unpack_color(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]); + vector float texel11 = spu_unpack_color(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]); + + /* Compute weighting factors in [0,1] + * Multiply texcoord by 1024, AND with 1023, convert back to float. + */ + vector float tc1024 = spu_mul(tc, spu_splats(1024.0f)); + vector signed int itc1024 = spu_convts(tc1024, 0); + itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1)); + vector float weight = spu_convtf(itc1024, 10); + + /* smeared frac and 1-frac */ + vector float sfrac = spu_splats(spu_extract(weight, 0)); + vector float tfrac = spu_splats(spu_extract(weight, 1)); + vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac); + vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac); + + /* multiply the samples (colors) by the S/T weights */ + texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1); + texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1); + texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac ); + texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac ); + + /* compute sum of weighted samples */ + vector float texel_sum = spu_add(texel00, texel01); + texel_sum = spu_add(texel_sum, texel10); + texel_sum = spu_add(texel_sum, texel11); + + /* convert to uint color */ + uint texel = spu_pack_R8G8B8A8(texel_sum); + + return texel; +} diff --git a/src/mesa/pipe/cell/spu/spu_texture.h b/src/mesa/pipe/cell/spu/spu_texture.h index 5bc8e71879..25cbe9b3c6 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.h +++ b/src/mesa/pipe/cell/spu/spu_texture.h @@ -40,4 +40,8 @@ extern uint sample_texture(vector float texcoord); +extern uint +sample_texture_bilinear(vector float texcoord); + + #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From ca1d2fc5f6fb138025f6848591e3494e4b881930 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 4 Feb 2008 13:16:10 -0700 Subject: Cell: improved bilinear filtering avoid calling get_tex_tile() if all texels are in same tile --- src/mesa/pipe/cell/spu/spu_texture.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_texture.c') diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c index 26a5eefc48..6e243f7fa3 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.c +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -150,16 +150,17 @@ sample_texture_bilinear(vector float texcoord) static const vector unsigned int offset01 = {0, 1, 0, 0}; vector float tc = spu_mul(texcoord, spu.tex_size); - /* itcST */ + /* integer texcoords S,T: */ vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */ vector unsigned int itc01 = spu_add(itc00, offset01); vector unsigned int itc10 = spu_add(itc00, offset10); vector unsigned int itc11 = spu_add(itc10, offset01); - itc00 = spu_and(itc00, spu.tex_size_mask); /* mask (GL_REPEAT) */ - itc01 = spu_and(itc01, spu.tex_size_mask); /* mask (GL_REPEAT) */ - itc10 = spu_and(itc10, spu.tex_size_mask); /* mask (GL_REPEAT) */ - itc11 = spu_and(itc11, spu.tex_size_mask); /* mask (GL_REPEAT) */ + /* mask (GL_REPEAT) */ + itc00 = spu_and(itc00, spu.tex_size_mask); + itc01 = spu_and(itc01, spu.tex_size_mask); + itc10 = spu_and(itc10, spu.tex_size_mask); + itc11 = spu_and(itc11, spu.tex_size_mask); /* intra tile addr */ vector unsigned int ij00 = spu_and(itc00, TILE_SIZE-1); @@ -167,11 +168,21 @@ sample_texture_bilinear(vector float texcoord) vector unsigned int ij10 = spu_and(itc10, TILE_SIZE-1); vector unsigned int ij11 = spu_and(itc11, TILE_SIZE-1); + /* get tile cache positions */ uint pos00 = get_tex_tile(itc00); - uint pos01 = get_tex_tile(itc01); - uint pos10 = get_tex_tile(itc10); - uint pos11 = get_tex_tile(itc11); + uint pos01, pos10, pos11; + if ((spu_extract(ij00, 0) < TILE_SIZE-1) && + (spu_extract(ij00, 1) < TILE_SIZE-1)) { + /* all texels are in the same tile */ + pos01 = pos10 = pos11 = pos00; + } + else { + pos01 = get_tex_tile(itc01); + pos10 = get_tex_tile(itc10); + pos11 = get_tex_tile(itc11); + } + /* get texels from tiles and convert to float[4] */ vector float texel00 = spu_unpack_color(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]); vector float texel01 = spu_unpack_color(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]); vector float texel10 = spu_unpack_color(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]); -- cgit v1.2.3 From 8f924e4df06a5d45dda338e7a0a87308e48df57e Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 4 Feb 2008 13:23:07 -0700 Subject: Cell: choose bilinear vs. nearest filtering according to sampler state --- src/mesa/pipe/cell/spu/spu_main.c | 4 ++++ src/mesa/pipe/cell/spu/spu_main.h | 2 ++ src/mesa/pipe/cell/spu/spu_texture.c | 2 +- src/mesa/pipe/cell/spu/spu_texture.h | 2 +- src/mesa/pipe/cell/spu/spu_tri.c | 8 ++++---- 5 files changed, 12 insertions(+), 6 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_texture.c') diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c index 412661061a..48e016fc8b 100644 --- a/src/mesa/pipe/cell/spu/spu_main.c +++ b/src/mesa/pipe/cell/spu/spu_main.c @@ -252,6 +252,10 @@ cmd_state_sampler(const struct pipe_sampler_state *state) spu.init.id); memcpy(&spu.sampler[0], state, sizeof(*state)); + if (spu.sampler[0].min_img_filter == PIPE_TEX_FILTER_LINEAR) + spu.sample_texture = sample_texture_bilinear; + else + spu.sample_texture = sample_texture_nearest; } diff --git a/src/mesa/pipe/cell/spu/spu_main.h b/src/mesa/pipe/cell/spu/spu_main.h index 02b62ee5cd..fb98b0d889 100644 --- a/src/mesa/pipe/cell/spu/spu_main.h +++ b/src/mesa/pipe/cell/spu/spu_main.h @@ -114,6 +114,8 @@ struct spu_global vector float tex_size; vector unsigned int tex_size_mask; /**< == int(size - 1) */ + uint (*sample_texture)(vector float texcoord); + } ALIGN16_ATTRIB; diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c index 6e243f7fa3..ecacf2ec88 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.c +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -131,7 +131,7 @@ get_tex_tile(vector unsigned int ij) * XXX this is extremely primitive for now. */ uint -sample_texture(vector float texcoord) +sample_texture_nearest(vector float texcoord) { vector float tc = spu_mul(texcoord, spu.tex_size); vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ diff --git a/src/mesa/pipe/cell/spu/spu_texture.h b/src/mesa/pipe/cell/spu/spu_texture.h index 25cbe9b3c6..0e000bfebf 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.h +++ b/src/mesa/pipe/cell/spu/spu_texture.h @@ -37,7 +37,7 @@ invalidate_tex_cache(void); extern uint -sample_texture(vector float texcoord); +sample_texture_nearest(vector float texcoord); extern uint diff --git a/src/mesa/pipe/cell/spu/spu_tri.c b/src/mesa/pipe/cell/spu/spu_tri.c index c148c75dd6..7b422f71a8 100644 --- a/src/mesa/pipe/cell/spu/spu_tri.c +++ b/src/mesa/pipe/cell/spu/spu_tri.c @@ -309,13 +309,13 @@ emit_quad( int x, int y, mask_t mask ) eval_coeff(2, (float) x, (float) y, texcoords); if (spu_extract(mask, 0)) - spu.ctile.ui[iy][ix] = sample_texture(texcoords[0].v); + spu.ctile.ui[iy][ix] = spu.sample_texture(texcoords[0].v); if (spu_extract(mask, 1)) - spu.ctile.ui[iy][ix+1] = sample_texture(texcoords[1].v); + spu.ctile.ui[iy][ix+1] = spu.sample_texture(texcoords[1].v); if (spu_extract(mask, 2)) - spu.ctile.ui[iy+1][ix] = sample_texture(texcoords[2].v); + spu.ctile.ui[iy+1][ix] = spu.sample_texture(texcoords[2].v); if (spu_extract(mask, 3)) - spu.ctile.ui[iy+1][ix+1] = sample_texture(texcoords[3].v); + spu.ctile.ui[iy+1][ix+1] = spu.sample_texture(texcoords[3].v); } else { /* simple shading */ -- cgit v1.2.3 From efa8e03a6f3f7c27b019d20cca93bf7e624d7035 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 4 Feb 2008 16:06:51 -0700 Subject: Cell: texture sampler functions always return vector float now Texture colors look the same now, regardless of X display/pixel format --- src/mesa/pipe/cell/spu/spu_main.h | 2 +- src/mesa/pipe/cell/spu/spu_texture.c | 19 ++++++++----------- src/mesa/pipe/cell/spu/spu_texture.h | 4 ++-- src/mesa/pipe/cell/spu/spu_tri.c | 36 ++++++++++++++++++------------------ 4 files changed, 29 insertions(+), 32 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_texture.c') diff --git a/src/mesa/pipe/cell/spu/spu_main.h b/src/mesa/pipe/cell/spu/spu_main.h index b22d563551..cfd4d72729 100644 --- a/src/mesa/pipe/cell/spu/spu_main.h +++ b/src/mesa/pipe/cell/spu/spu_main.h @@ -115,7 +115,7 @@ struct spu_global vector float tex_size; vector unsigned int tex_size_mask; /**< == int(size - 1) */ - uint (*sample_texture)(vector float texcoord); + vector float (*sample_texture)(vector float texcoord); } ALIGN16_ATTRIB; diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c index ecacf2ec88..9ee2b45e24 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.c +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -130,7 +130,7 @@ get_tex_tile(vector unsigned int ij) * Get texture sample at texcoord. * XXX this is extremely primitive for now. */ -uint +vector float sample_texture_nearest(vector float texcoord) { vector float tc = spu_mul(texcoord, spu.tex_size); @@ -139,11 +139,11 @@ sample_texture_nearest(vector float texcoord) vector unsigned int ij = spu_and(itc, TILE_SIZE-1); /* intra tile addr */ uint pos = get_tex_tile(itc); uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)]; - return texel; + return spu_unpack_A8R8G8B8(texel); } -uint +vector float sample_texture_bilinear(vector float texcoord) { static const vector unsigned int offset10 = {1, 0, 0, 0}; @@ -183,10 +183,10 @@ sample_texture_bilinear(vector float texcoord) } /* get texels from tiles and convert to float[4] */ - vector float texel00 = spu_unpack_color(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]); - vector float texel01 = spu_unpack_color(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]); - vector float texel10 = spu_unpack_color(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]); - vector float texel11 = spu_unpack_color(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]); + vector float texel00 = spu_unpack_A8R8G8B8(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]); + vector float texel01 = spu_unpack_A8R8G8B8(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]); + vector float texel10 = spu_unpack_A8R8G8B8(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]); + vector float texel11 = spu_unpack_A8R8G8B8(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]); /* Compute weighting factors in [0,1] * Multiply texcoord by 1024, AND with 1023, convert back to float. @@ -213,8 +213,5 @@ sample_texture_bilinear(vector float texcoord) texel_sum = spu_add(texel_sum, texel10); texel_sum = spu_add(texel_sum, texel11); - /* convert to uint color */ - uint texel = spu_pack_R8G8B8A8(texel_sum); - - return texel; + return texel_sum; } diff --git a/src/mesa/pipe/cell/spu/spu_texture.h b/src/mesa/pipe/cell/spu/spu_texture.h index 0e000bfebf..95eb87080f 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.h +++ b/src/mesa/pipe/cell/spu/spu_texture.h @@ -36,11 +36,11 @@ extern void invalidate_tex_cache(void); -extern uint +extern vector float sample_texture_nearest(vector float texcoord); -extern uint +extern vector float sample_texture_bilinear(vector float texcoord); diff --git a/src/mesa/pipe/cell/spu/spu_tri.c b/src/mesa/pipe/cell/spu/spu_tri.c index 89aaca9a72..4c6de56eda 100644 --- a/src/mesa/pipe/cell/spu/spu_tri.c +++ b/src/mesa/pipe/cell/spu/spu_tri.c @@ -301,6 +301,8 @@ emit_quad( int x, int y, mask_t mask ) if (spu_extract(spu_orx(mask), 0)) { const int ix = x - setup.cliprect_minx; const int iy = y - setup.cliprect_miny; + const vector unsigned char shuffle = spu.color_shuffle; + vector float colors[4]; spu.cur_ctile_status = TILE_STATUS_DIRTY; @@ -310,34 +312,32 @@ emit_quad( int x, int y, mask_t mask ) eval_coeff(2, (float) x, (float) y, texcoords); if (spu_extract(mask, 0)) - spu.ctile.ui[iy][ix] = spu.sample_texture(texcoords[0]); + colors[0] = spu.sample_texture(texcoords[0]); if (spu_extract(mask, 1)) - spu.ctile.ui[iy][ix+1] = spu.sample_texture(texcoords[1]); + colors[1] = spu.sample_texture(texcoords[1]); if (spu_extract(mask, 2)) - spu.ctile.ui[iy+1][ix] = spu.sample_texture(texcoords[2]); + colors[2] = spu.sample_texture(texcoords[2]); if (spu_extract(mask, 3)) - spu.ctile.ui[iy+1][ix+1] = spu.sample_texture(texcoords[3]); + colors[3] = spu.sample_texture(texcoords[3]); } else { /* simple shading */ - const vector unsigned char shuffle = spu.color_shuffle; - vector float colors[4]; eval_coeff(1, (float) x, (float) y, colors); + } -#if 0 - if (spu.blend.blend_enable) - blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors); +#if 1 + if (spu.blend.blend_enable) + blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors); #endif - if (spu_extract(mask, 0)) - spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle); - if (spu_extract(mask, 1)) - spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle); - if (spu_extract(mask, 2)) - spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle); - if (spu_extract(mask, 3)) - spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle); - } + if (spu_extract(mask, 0)) + spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle); + if (spu_extract(mask, 1)) + spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle); + if (spu_extract(mask, 2)) + spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle); + if (spu_extract(mask, 3)) + spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle); #if 0 /* SIMD_Z with swizzled color buffer (someday) */ -- cgit v1.2.3 From 1a75464cdc12a1e83f1452707cd624c53f808308 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 4 Feb 2008 16:48:00 -0700 Subject: Cell: fix small sampling error in sample_texture_bilinear() --- src/mesa/pipe/cell/spu/spu_texture.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa/pipe/cell/spu/spu_texture.c') diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c index 9ee2b45e24..01ff33a857 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.c +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -150,6 +150,8 @@ sample_texture_bilinear(vector float texcoord) static const vector unsigned int offset01 = {0, 1, 0, 0}; vector float tc = spu_mul(texcoord, spu.tex_size); + tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ + /* integer texcoords S,T: */ vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */ vector unsigned int itc01 = spu_add(itc00, offset01); -- cgit v1.2.3 From 684d320ea2e7ec03d01275a544068cc6b45e1e9a Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 4 Feb 2008 18:03:05 -0700 Subject: Cell: don't use VEC_LITERAL macro, doesn't work w/ SDK 3.0 --- src/mesa/pipe/cell/spu/spu_texture.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_texture.c') diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c index 01ff33a857..3962aaa4a9 100644 --- a/src/mesa/pipe/cell/spu/spu_texture.c +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -26,8 +26,6 @@ **************************************************************************/ -#include - #include "pipe/p_compiler.h" #include "spu_main.h" #include "spu_texture.h" @@ -57,7 +55,7 @@ invalidate_tex_cache(void) /* XXX memset? */ uint i; for (i = 0; i < CACHE_SIZE; i++) { - tex_tile_xy[i] = VEC_LITERAL(vector unsigned int, ~0U, ~0U, ~0U, ~0U); + tex_tile_xy[i] = ((vector unsigned int) { ~0U, ~0U, ~0U, ~0U }); } } -- cgit v1.2.3