From 85dc1aec9c5fc63a01bb8db07215b84790d15d8f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 14 Oct 2008 15:19:01 -0600 Subject: cell: support NPOT textures, clamp/repeat mode, normalized/unorm texcoords glDrawPixels works now. --- src/gallium/drivers/cell/spu/spu_command.c | 48 +++++++++++++-- src/gallium/drivers/cell/spu/spu_main.h | 12 ++-- src/gallium/drivers/cell/spu/spu_texture.c | 99 ++++++++++++++++++++---------- 3 files changed, 117 insertions(+), 42 deletions(-) diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index fa78377c66..b1efe97e76 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -295,6 +295,42 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) } +/** + * Tex texture mask_s/t and scale_s/t fields depend on the texture size and + * sampler wrap modes. + */ +static void +update_tex_masks(struct spu_texture *texture, + const struct pipe_sampler_state *sampler) +{ + uint i; + + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + int width = texture->level[i].width; + int height = texture->level[i].height; + + if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) + texture->level[i].mask_s = spu_splats(width - 1); + else + texture->level[i].mask_s = spu_splats(~0); + + if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT) + texture->level[i].mask_t = spu_splats(height - 1); + else + texture->level[i].mask_t = spu_splats(~0); + + if (sampler->normalized_coords) { + texture->level[i].scale_s = spu_splats((float) width); + texture->level[i].scale_t = spu_splats((float) height); + } + else { + texture->level[i].scale_s = spu_splats(1.0f); + texture->level[i].scale_t = spu_splats(1.0f); + } + } +} + + static void cmd_state_sampler(const struct cell_command_sampler *sampler) { @@ -341,6 +377,8 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) default: ASSERT(0); } + + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); } @@ -370,15 +408,15 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].level[i].tiles_per_row = (width + TILE_SIZE - 1) / TILE_SIZE; - spu.texture[unit].level[i].width4 = spu_splats((float) width); - spu.texture[unit].level[i].height4 = spu_splats((float) height); - - spu.texture[unit].level[i].tex_size_x_mask = spu_splats(width - 1); - spu.texture[unit].level[i].tex_size_y_mask = spu_splats(height - 1); + spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); + spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); if (texture->start[i]) spu.texture[unit].max_level = i; } + + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); + //Debug=0; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 56aac655e9..45c6f4ced1 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -107,17 +107,21 @@ struct spu_framebuffer } ALIGN16_ATTRIB; +/** per-texture level info */ struct spu_texture_level { void *start; ushort width, height; ushort tiles_per_row; - vector float width4; /**< == {width, width, width, width} */ - vector float height4; /**< == {height, height, height, height} */ - vector unsigned int tex_size_x_mask; /**< splat(width-1) */ - vector unsigned int tex_size_y_mask; /**< splat(height-1) */ + /** texcoord scale factors */ + vector float scale_s, scale_t; + /** texcoord masks (if REPEAT then size-1, else ~0) */ + vector signed int mask_s, mask_t; + /** texcoord clamp limits */ + vector signed int max_s, max_t; } ALIGN16_ATTRIB; + struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 83cf7dc394..b21c43a467 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -67,13 +67,13 @@ invalidate_tex_cache(void) * a time. */ static void -get_four_texels(uint unit, uint level, vec_uint4 x, vec_uint4 y, +get_four_texels(uint unit, uint level, vec_int4 x, vec_int4 y, vec_uint4 *texels) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; const unsigned texture_ea = (uintptr_t) tlevel->start; - const vec_uint4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ - const vec_uint4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ + const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ + const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ @@ -99,6 +99,20 @@ get_four_texels(uint unit, uint level, vec_uint4 x, vec_uint4 y, } +/** clamp vec to [0, max] */ +static INLINE vector signed int +spu_clamp(vector signed int vec, vector signed int max) +{ + static const vector signed int zero = {0,0,0,0}; + vector unsigned int c; + c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */ + vec = spu_sel(zero, vec, c); + c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */ + vec = spu_sel(vec, max, c); + return vec; +} + + /** * Do nearest texture sampling for four pixels. @@ -109,15 +123,20 @@ sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, uint unit, uint level, vector float colors[4]) { - vector float ss = spu_mul(s, spu.texture[unit].level[level].width4); - vector float tt = spu_mul(t, spu.texture[unit].level[level].height4); - vector unsigned int is = spu_convtu(ss, 0); - vector unsigned int it = spu_convtu(tt, 0); + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + vector float ss = spu_mul(s, tlevel->scale_s); + vector float tt = spu_mul(t, tlevel->scale_t); + vector signed int is = spu_convts(ss, 0); + vector signed int it = spu_convts(tt, 0); vec_uint4 texels[4]; /* PIPE_TEX_WRAP_REPEAT */ - is = spu_and(is, spu.texture[unit].level[level].tex_size_x_mask); - it = spu_and(it, spu.texture[unit].level[level].tex_size_y_mask); + is = spu_and(is, tlevel->mask_s); + it = spu_and(it, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is = spu_clamp(is, tlevel->max_s); + it = spu_clamp(it, tlevel->max_t); get_four_texels(unit, level, is, it, texels); @@ -135,21 +154,28 @@ sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, uint unit, uint level, vector float colors[4]) { - vector float ss = spu_madd(s, spu.texture[unit].level[level].width4, spu_splats(-0.5f)); - vector float tt = spu_madd(t, spu.texture[unit].level[level].height4, spu_splats(-0.5f)); + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + vector float ss = spu_madd(s, tlevel->scale_s, spu_splats(-0.5f)); + vector float tt = spu_madd(t, tlevel->scale_t, spu_splats(-0.5f)); - vector unsigned int is0 = (vector unsigned int) spu_convts(ss, 0); - vector unsigned int it0 = (vector unsigned int) spu_convts(tt, 0); + vector signed int is0 = spu_convts(ss, 0); + vector signed int it0 = spu_convts(tt, 0); /* is + 1, it + 1 */ - vector unsigned int is1 = spu_add(is0, 1); - vector unsigned int it1 = spu_add(it0, 1); + vector signed int is1 = spu_add(is0, 1); + vector signed int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].level[level].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].level[level].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].level[level].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].level[level].tex_size_y_mask); + is0 = spu_and(is0, tlevel->mask_s); + it0 = spu_and(it0, tlevel->mask_t); + is1 = spu_and(is1, tlevel->mask_s); + it1 = spu_and(it1, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is0 = spu_clamp(is0, tlevel->max_s); + it0 = spu_clamp(it0, tlevel->max_t); + is1 = spu_clamp(is1, tlevel->max_s); + it1 = spu_clamp(it1, tlevel->max_t); /* get packed int texels */ vector unsigned int texels[16]; @@ -275,34 +301,41 @@ sample_texture4_bilinear_2(vector float s, vector float t, vector float r, vector float q, uint unit, uint level, vector float colors[4]) { + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; /* Scale texcoords by size of texture, and add half pixel bias */ - vector float ss = spu_madd(s, spu.texture[unit].level[level].width4, half); - vector float tt = spu_madd(t, spu.texture[unit].level[level].height4, half); + vector float ss = spu_madd(s, tlevel->scale_s, half); + vector float tt = spu_madd(t, tlevel->scale_t, half); /* convert float coords to fixed-pt coords with 8 fraction bits */ - vector unsigned int is = (vector unsigned int) spu_convts(ss, 8); - vector unsigned int it = (vector unsigned int) spu_convts(tt, 8); + vector signed int is = spu_convts(ss, 8); + vector signed int it = spu_convts(tt, 8); /* compute integer texel weights in [0, 255] */ - vector signed int sWeights0 = spu_and((vector signed int) is, 255); - vector signed int tWeights0 = spu_and((vector signed int) it, 255); + vector signed int sWeights0 = spu_and(is, 255); + vector signed int tWeights0 = spu_and(it, 255); vector signed int sWeights1 = spu_sub(255, sWeights0); vector signed int tWeights1 = spu_sub(255, tWeights0); /* texel coords: is0 = is / 256, it0 = is / 256 */ - vector unsigned int is0 = spu_rlmask(is, -8); - vector unsigned int it0 = spu_rlmask(it, -8); + vector signed int is0 = spu_rlmask(is, -8); + vector signed int it0 = spu_rlmask(it, -8); /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */ - vector unsigned int is1 = spu_add(is0, 1); - vector unsigned int it1 = spu_add(it0, 1); + vector signed int is1 = spu_add(is0, 1); + vector signed int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].level[level].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].level[level].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].level[level].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].level[level].tex_size_y_mask); + is0 = spu_and(is0, tlevel->mask_s); + it0 = spu_and(it0, tlevel->mask_t); + is1 = spu_and(is1, tlevel->mask_s); + it1 = spu_and(it1, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is0 = spu_clamp(is0, tlevel->max_s); + it0 = spu_clamp(it0, tlevel->max_t); + is1 = spu_clamp(is1, tlevel->max_s); + it1 = spu_clamp(it1, tlevel->max_t); /* get packed int texels */ vector unsigned int texels[16]; -- cgit v1.2.3