From 217d37940771dd02ff1aa365105eca2c7a09d623 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 2 Apr 2008 14:01:42 -0600 Subject: cell: minor texture improvements Precompute tiles_per_row. Use ushort multiplies in a few places. New comments. --- src/gallium/drivers/cell/spu/spu_main.c | 2 ++ src/gallium/drivers/cell/spu/spu_main.h | 3 ++- src/gallium/drivers/cell/spu/spu_texture.c | 32 ++++++++++++++++++++---------- 3 files changed, 26 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 1ab1c40379..e04ffeb9b1 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -343,6 +343,8 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].width = width; spu.texture[unit].height = height; + spu.texture[unit].tiles_per_row = width / TILE_SIZE; + spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0}; spu.texture[unit].tex_size_mask = (vector unsigned int) { width - 1, height - 1, 0, 0 }; diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index e9e39cbeab..e962e1426c 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -105,7 +105,8 @@ struct spu_framebuffer { struct spu_texture { void *start; - uint width, height; + ushort width, height; + ushort tiles_per_row; vector float tex_size; vector unsigned int tex_size_mask; /**< == int(size - 1) */ vector unsigned int tex_size_x_mask; /**< == int(size - 1) */ diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index ceab246980..e9a2754e57 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -51,22 +51,25 @@ invalidate_tex_cache(void) static uint get_texel(uint unit, vec_uint4 coordinate) { + const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; + ushort x = spu_extract(coordinate, 0); + ushort y = spu_extract(coordinate, 1); + unsigned tile_offset = sizeof(tile_t) + * ((y / TILE_SIZE * spu.texture[unit].tiles_per_row) + (x / TILE_SIZE)); + ushort texel_offset = (ushort) 4 + * (ushort) (((ushort) (y % TILE_SIZE) * (ushort) TILE_SIZE) + (x % TILE_SIZE)); vec_uint4 tmp; - unsigned x = spu_extract(coordinate, 0); - unsigned y = spu_extract(coordinate, 1); - const unsigned tiles_per_row = spu.texture[unit].width / TILE_SIZE; - unsigned tile_offset = sizeof(tile_t) * ((y / TILE_SIZE * tiles_per_row) - + (x / TILE_SIZE)); - unsigned texel_offset = 4 * (((y % TILE_SIZE) * TILE_SIZE) - + (x % TILE_SIZE)); spu_dcache_fetch_unaligned((qword *) & tmp, - spu.texture[unit].start + tile_offset + texel_offset, + texture_ea + tile_offset + texel_offset, 4); return spu_extract(tmp, 0); } +/** + * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ... + */ static void get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) { @@ -76,7 +79,7 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) const qword offset_x = si_andi((qword) x, 0x1f); const qword offset_y = si_andi((qword) y, 0x1f); - const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].width / TILE_SIZE); + const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row); const qword tile_size = (qword) spu_splats(sizeof(tile_t)); qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); @@ -97,6 +100,7 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) texture_ea + spu_extract(offset, 3), 4); } + /** * Get texture sample at texcoord. * XXX this is extremely primitive for now. @@ -126,17 +130,25 @@ sample_texture_bilinear(uint unit, vector float texcoord) vec_uint4 texels[4]; + /* setup texcoords for quad: + * +-----+-----+ + * |x0,y0|x1,y1| + * +-----+-----+ + * |x2,y2|x3,y3| + * +-----+-----+ + */ vec_uint4 x = spu_splats(spu_extract(itc, 0)); vec_uint4 y = spu_splats(spu_extract(itc, 1)); - x = spu_add(x, offset_x); y = spu_add(y, offset_y); + /* GL_REPEAT wrap mode: */ x = spu_and(x, spu.texture[unit].tex_size_x_mask); y = spu_and(y, spu.texture[unit].tex_size_y_mask); get_four_texels(unit, x, y, texels); + /* integer A8R8G8B8 to float texel conversion */ vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0)); vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0)); vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0)); -- cgit v1.2.3 From a7504ad587ee8cbfa9958ad23321a691ce0823d3 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 2 Apr 2008 14:30:28 -0600 Subject: cell: added some comments/ideas about better texture sampling --- src/gallium/drivers/cell/spu/spu_texture.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index e9a2754e57..5051774f00 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -48,9 +48,16 @@ invalidate_tex_cache(void) } +/** + * XXX look into getting texels for all four pixels in a quad at once. + */ static uint get_texel(uint unit, vec_uint4 coordinate) { + /* + * XXX we could do the "/ TILE_SIZE" and "% TILE_SIZE" operations as + * SIMD since X and Y are already in a SIMD register. + */ const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; ushort x = spu_extract(coordinate, 0); ushort y = spu_extract(coordinate, 1); @@ -69,6 +76,16 @@ get_texel(uint unit, vec_uint4 coordinate) /** * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ... + * + * NOTE: in the typical case of bilinear filtering, the four texels + * are in a 2x2 group so we could get by with just two dcache fetches + * (two side-by-side texels per fetch). But when bilinear filtering + * wraps around a texture edge, we'll probably need code like we have + * now. + * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time, + * it's quite likely that the four pixels in a quad will need some of the + * same texels. So look into doing texture fetches for four pixels at + * a time. */ static void get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) @@ -103,7 +120,6 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) /** * Get texture sample at texcoord. - * XXX this is extremely primitive for now. */ vector float sample_texture_nearest(uint unit, vector float texcoord) -- cgit v1.2.3