From 164fb1299e1614ce05ae539d832567469eedb402 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 26 Sep 2008 09:38:40 -0600 Subject: cell: checkpoint: support for function calls in SPU shaders Will be used for instructions like SIN/COS/POW/TEX/etc. The PPU needs to know the address of some functions in the SPU address space. Send that info to the PPU/main memory rather than patch up shaders on the SPU side. Not finished/tested yet... --- src/gallium/drivers/cell/spu/spu_funcs.c | 106 +++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 src/gallium/drivers/cell/spu/spu_funcs.c (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c new file mode 100644 index 0000000000..d174956518 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -0,0 +1,106 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * SPU functions accessed by shaders. + * + * Authors: Brian Paul + */ + + +#include +#include +#include +#include + +#include "cell/common.h" +#include "spu_main.h" +#include "spu_funcs.h" + + +#define M_PI 3.1415926 + + +static vector float +spu_cos(vector float x) +{ + static const float scale = 1.0 / (2.0 * M_PI); + x = x * spu_splats(scale); /* normalize */ + return _cos8_v(x); +} + +static vector float +spu_sin(vector float x) +{ + static const float scale = 1.0 / (2.0 * M_PI); + x = x * spu_splats(scale); /* normalize */ + return _sin8_v(x); /* 8-bit accuracy enough?? */ +} + + +static void +add_func(struct cell_spu_function_info *spu_functions, + const char *name, void *addr) +{ + uint n = spu_functions->num; + ASSERT(strlen(name) < 16); + strcpy(spu_functions->names[n], name); + spu_functions->addrs[n] = (uint) addr; + spu_functions->num++; +} + + +/** + * Return info about the SPU's function to the PPU / main memory. + * The PPU needs to know the address of some SPU-side functions so + * that we can generate shader code with function calls. + */ +void +return_function_info(void) +{ + struct cell_spu_function_info funcs ALIGN16_ATTRIB; + int tag = TAG_MISC; + + ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ + + funcs.num = 0; + add_func(&funcs, "spu_cos", &spu_cos); + add_func(&funcs, "spu_sin", &spu_sin); + + /* Send the function info back to the PPU / main memory */ + mfc_put((void *) &funcs, /* src in local store */ + (unsigned int) spu.init.spu_functions, /* dst in main memory */ + sizeof(funcs), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << tag); +} + + + -- cgit v1.2.3 From 55b65d3b42b8ba1ea1c5b5549b4629f3b20e7a97 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 26 Sep 2008 17:57:01 -0600 Subject: cell: stub-out sin/cos function bodies to avoid trashing caller's stack for now --- src/gallium/drivers/cell/spu/spu_funcs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index d174956518..b57ad3f3b8 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -49,17 +49,27 @@ static vector float spu_cos(vector float x) { +#if 0 static const float scale = 1.0 / (2.0 * M_PI); x = x * spu_splats(scale); /* normalize */ return _cos8_v(x); +#else + /* just pass-through to avoid trashing caller's stack */ + return x; +#endif } static vector float spu_sin(vector float x) { +#if 0 static const float scale = 1.0 / (2.0 * M_PI); x = x * spu_splats(scale); /* normalize */ return _sin8_v(x); /* 8-bit accuracy enough?? */ +#else + /* just pass-through to avoid trashing caller's stack */ + return x; +#endif } -- cgit v1.2.3 From a4e477433f485a39b5de448d0a9cb6f4bf9bb90f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 8 Oct 2008 20:34:35 -0600 Subject: cell: implement more built-in shader functions, link spu code with -lm --- configs/linux-cell | 2 +- src/gallium/drivers/cell/spu/spu_funcs.c | 65 +++++++++++++++++++++----------- 2 files changed, 45 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/configs/linux-cell b/configs/linux-cell index 86651b83d7..8d74ee469d 100644 --- a/configs/linux-cell +++ b/configs/linux-cell @@ -53,7 +53,7 @@ SPU_CFLAGS = $(OPT_FLAGS) -W -Wall -Winline -Wmissing-prototypes -Wno-main \ -DSPU_MAIN_PARAM_LONG_LONG \ -include spu_intrinsics.h -SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc +SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc -lm SPU_AR = ppu-ar SPU_AR_FLAGS = -qcs diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index b57ad3f3b8..1adf9de0e8 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -35,41 +35,61 @@ #include #include -#include -#include +#include +#include +#include #include "cell/common.h" #include "spu_main.h" #include "spu_funcs.h" -#define M_PI 3.1415926 - - static vector float spu_cos(vector float x) { -#if 0 - static const float scale = 1.0 / (2.0 * M_PI); - x = x * spu_splats(scale); /* normalize */ - return _cos8_v(x); -#else - /* just pass-through to avoid trashing caller's stack */ - return x; -#endif + return _cos14_v(x); } static vector float spu_sin(vector float x) { -#if 0 - static const float scale = 1.0 / (2.0 * M_PI); - x = x * spu_splats(scale); /* normalize */ - return _sin8_v(x); /* 8-bit accuracy enough?? */ -#else - /* just pass-through to avoid trashing caller's stack */ - return x; -#endif + return _sin14_v(x); +} + +static vector float +spu_pow(vector float x, vector float y) +{ + float z0 = powf(spu_extract(x,0), spu_extract(y,0)); + float z1 = powf(spu_extract(x,1), spu_extract(y,1)); + float z2 = powf(spu_extract(x,2), spu_extract(y,2)); + float z3 = powf(spu_extract(x,3), spu_extract(y,3)); + return (vector float) {z0, z1, z2, z3}; +} + +static vector float +spu_exp2(vector float x) +{ + float z0 = powf(2.0f, spu_extract(x,0)); + float z1 = powf(2.0f, spu_extract(x,1)); + float z2 = powf(2.0f, spu_extract(x,2)); + float z3 = powf(2.0f, spu_extract(x,3)); + return (vector float) {z0, z1, z2, z3}; +} + +static vector float +spu_log2(vector float x) +{ + /* + * log_base_2(x) = log(x) / log(2) + * 1.442695 = 1/log(2). + */ + static const vector float k = {1.442695F, 1.442695F, 1.442695F, 1.442695F}; + float z0 = logf(spu_extract(x,0)); + float z1 = logf(spu_extract(x,1)); + float z2 = logf(spu_extract(x,2)); + float z3 = logf(spu_extract(x,3)); + vector float v = (vector float) {z0, z1, z2, z3}; + return spu_mul(v, k); } @@ -101,6 +121,9 @@ return_function_info(void) funcs.num = 0; add_func(&funcs, "spu_cos", &spu_cos); add_func(&funcs, "spu_sin", &spu_sin); + add_func(&funcs, "spu_pow", &spu_pow); + add_func(&funcs, "spu_exp2", &spu_exp2); + add_func(&funcs, "spu_log2", &spu_log2); /* Send the function info back to the PPU / main memory */ mfc_put((void *) &funcs, /* src in local store */ -- cgit v1.2.3 From 583098e3cb602fd9810a7c65718155fd9b0b3fda Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 9 Oct 2008 19:48:53 -0600 Subject: cell: implement basic TXP instruction in fragment shaders Lots of restrictions for now (one 2D texture, no mipmaps, etc.) for now but basic texture demos work. TEX, TXD, TXP do the same thing for the time being. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 109 ++++++++++++++++++++++++----- src/gallium/drivers/cell/spu/spu_funcs.c | 51 ++++++++++++-- src/gallium/drivers/cell/spu/spu_tri.c | 2 +- 3 files changed, 138 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 5647bb23e6..c8125a8a05 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -226,6 +226,11 @@ get_src_reg(struct codegen *gen, spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); } break; + case TGSI_FILE_SAMPLER: + { + reg = 3; /* XXX total hack */ + } + break; default: assert(0); } @@ -1162,6 +1167,21 @@ print_functions(struct cell_context *cell) #endif +static uint +lookup_function(struct cell_context *cell, const char *funcname) +{ + const struct cell_spu_function_info *funcs = &cell->spu_functions; + uint i, addr = 0; + for (i = 0; i < funcs->num; i++) { + if (strcmp(funcs->names[i], funcname) == 0) { + addr = funcs->addrs[i]; + } + } + assert(addr && "spu function not found"); + return addr / 4; /* discard 2 least significant bits */ +} + + /** * Emit code to call a SPU function. * Used to implement instructions like SIN/COS/POW/TEX/etc. @@ -1171,27 +1191,12 @@ emit_function_call(struct codegen *gen, const struct tgsi_full_instruction *inst, char *funcname, uint num_args) { - const struct cell_spu_function_info *funcs = &gen->cell->spu_functions; + const uint addr = lookup_function(gen->cell, funcname); char comment[100]; - uint addr; int ch; assert(num_args <= 3); - /* lookup function address */ - { - uint i; - addr = 0; - for (i = 0; i < funcs->num; i++) { - if (strcmp(funcs->names[i], funcname) == 0) { - addr = funcs->addrs[i]; - } - } - assert(addr && "spu function not found"); - } - - addr /= 4; /* discard 2 least significant bits */ - snprintf(comment, sizeof(comment), "CALL %s:", funcname); spe_comment(gen->f, -4, comment); @@ -1245,6 +1250,72 @@ emit_function_call(struct codegen *gen, } +static boolean +emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const uint addr = lookup_function(gen->cell, "spu_txp"); + int ch; + int coord_regs[4], d_regs[4]; + + spe_comment(gen->f, -4, "CALL txp:"); + + /* get src/dst reg info */ + for (ch = 0; ch < 4; ch++) { + coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + { + ubyte usedRegs[SPE_NUM_REGS]; + uint i, numUsed; + + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 32); + + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + + /* setup function arguments */ + for (i = 0; i < 4; i++) { + spe_move(gen->f, 3 + i, coord_regs[i]); + } + + /* branch to function, save return addr */ + spe_brasl(gen->f, SPE_REG_RA, addr); + + /* save function's return values (four pixel's colors) */ + for (i = 0; i < 4; i++) { + spe_move(gen->f, d_regs[i], 3 + i); + } + + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_regs[0] && + reg != d_regs[1] && + reg != d_regs[2] && + reg != d_regs[3]) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + } + } + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return TRUE; +} + + /** * Emit max. See emit_SGT for comments. */ @@ -1483,6 +1554,12 @@ emit_instruction(struct codegen *gen, return emit_function_call(gen, inst, "spu_exp2", 1); case TGSI_OPCODE_LOGBASE2: return emit_function_call(gen, inst, "spu_log2", 1); + case TGSI_OPCODE_TEX: + /* fall-through for now */ + case TGSI_OPCODE_TXD: + /* fall-through for now */ + case TGSI_OPCODE_TXP: + return emit_TXP(gen, inst); case TGSI_OPCODE_IF: return emit_IF(gen, inst); diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 1adf9de0e8..c7bcb3de9d 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -38,12 +38,20 @@ #include #include #include +#include #include "cell/common.h" #include "spu_main.h" #include "spu_funcs.h" +/** For "return"-ing four vectors */ +struct vec_4x4 +{ + vector float v[4]; +}; + + static vector float spu_cos(vector float x) { @@ -92,16 +100,44 @@ spu_log2(vector float x) return spu_mul(v, k); } +static struct vec_4x4 +spu_txp(vector float s, vector float t, vector float r, vector float q) +{ + const uint unit = 0; + struct vec_4x4 colors; + vector float coords[4]; + + coords[0] = s; + coords[1] = t; + coords[2] = r; + coords[3] = q; + _transpose_matrix4x4(coords, coords); + + /* get four texture samples */ + colors.v[0] = spu.sample_texture[unit](unit, coords[0]); + colors.v[1] = spu.sample_texture[unit](unit, coords[1]); + colors.v[2] = spu.sample_texture[unit](unit, coords[2]); + colors.v[3] = spu.sample_texture[unit](unit, coords[3]); + + _transpose_matrix4x4(colors.v, colors.v); + return colors; +} + +/** + * Add named function to list of "exported" functions that will be + * made available to the PPU-hosted code generator. + */ static void -add_func(struct cell_spu_function_info *spu_functions, - const char *name, void *addr) +export_func(struct cell_spu_function_info *spu_functions, + const char *name, void *addr) { uint n = spu_functions->num; ASSERT(strlen(name) < 16); strcpy(spu_functions->names[n], name); spu_functions->addrs[n] = (uint) addr; spu_functions->num++; + ASSERT(spu_functions->num <= 16); } @@ -119,11 +155,12 @@ return_function_info(void) ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ funcs.num = 0; - add_func(&funcs, "spu_cos", &spu_cos); - add_func(&funcs, "spu_sin", &spu_sin); - add_func(&funcs, "spu_pow", &spu_pow); - add_func(&funcs, "spu_exp2", &spu_exp2); - add_func(&funcs, "spu_log2", &spu_log2); + export_func(&funcs, "spu_cos", &spu_cos); + export_func(&funcs, "spu_sin", &spu_sin); + export_func(&funcs, "spu_pow", &spu_pow); + export_func(&funcs, "spu_exp2", &spu_exp2); + export_func(&funcs, "spu_log2", &spu_log2); + export_func(&funcs, "spu_txp", &spu_txp); /* Send the function info back to the PPU / main memory */ mfc_put((void *) &funcs, /* src in local store */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 6039cd80b2..87991c3136 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -286,7 +286,7 @@ emit_quad( int x, int y, mask_t mask) spu.cur_ctile_status = TILE_STATUS_DIRTY; spu.cur_ztile_status = TILE_STATUS_DIRTY; - if (spu.texture[0].start) { + if (0/*spu.texture[0].start*/) { /* * Temporary texture mapping path * This will go away when fragment programs support TEX inst. -- cgit v1.2.3 From 01e312a73b68dc5ddffca0d1b1472fc5dcb6f59e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 10 Oct 2008 16:36:40 -0600 Subject: cell: pass texture unit (sampler number) to txp() function The glsl/multitex demo runs now. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 4 ++++ src/gallium/drivers/cell/spu/spu_funcs.c | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 3d0e7976df..ef84059d8f 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1285,9 +1285,12 @@ static boolean emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) { const uint addr = lookup_function(gen->cell, "spu_txp"); + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; int ch; int coord_regs[4], d_regs[4]; + assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); + spe_comment(gen->f, -4, "CALL txp:"); /* get src/dst reg info */ @@ -1314,6 +1317,7 @@ emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) for (i = 0; i < 4; i++) { spe_move(gen->f, 3 + i, coord_regs[i]); } + spe_load_uint(gen->f, 7, unit); /* sampler unit */ /* branch to function, save return addr */ spe_brasl(gen->f, SPE_REG_RA, addr); diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index c7bcb3de9d..7dd7fcd253 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -101,9 +101,10 @@ spu_log2(vector float x) } static struct vec_4x4 -spu_txp(vector float s, vector float t, vector float r, vector float q) +spu_txp(vector float s, vector float t, vector float r, vector float q, + unsigned unit) { - const uint unit = 0; + //const uint unit = 0; struct vec_4x4 colors; vector float coords[4]; -- cgit v1.2.3 From 3b07c28dee74c7aa3be5efac8084d610675af291 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 13 Oct 2008 10:55:08 -0600 Subject: cell: do texture sampling/filtering for four pixels at a time. --- src/gallium/drivers/cell/spu/spu_command.c | 11 ++- src/gallium/drivers/cell/spu/spu_funcs.c | 4 + src/gallium/drivers/cell/spu/spu_main.h | 19 ++++- src/gallium/drivers/cell/spu/spu_texture.c | 125 ++++++++++++++++++++++++++++- src/gallium/drivers/cell/spu/spu_texture.h | 12 +++ 5 files changed, 161 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 91a4c137e7..c59be7defd 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -301,10 +301,14 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); spu.sampler[sampler->unit] = sampler->state; - if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) + if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { spu.sample_texture[sampler->unit] = sample_texture_bilinear; - else + spu.sample_texture4[sampler->unit] = sample_texture4_bilinear; + } + else { spu.sample_texture[sampler->unit] = sample_texture_nearest; + spu.sample_texture4[sampler->unit] = sample_texture4_nearest; + } } @@ -323,6 +327,9 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].width = width; spu.texture[unit].height = height; + spu.texture[unit].width4 = spu_splats((float) width); + spu.texture[unit].height4 = spu_splats((float) height); + spu.texture[unit].tiles_per_row = width / TILE_SIZE; spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0}; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 7dd7fcd253..13c234ea2e 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -106,6 +106,7 @@ spu_txp(vector float s, vector float t, vector float r, vector float q, { //const uint unit = 0; struct vec_4x4 colors; +#if 0 vector float coords[4]; coords[0] = s; @@ -121,6 +122,9 @@ spu_txp(vector float s, vector float t, vector float r, vector float q, colors.v[3] = spu.sample_texture[unit](unit, coords[3]); _transpose_matrix4x4(colors.v, colors.v); +#else + spu.sample_texture4[unit](s, t, r, q, unit, colors.v); +#endif return colors; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 82c9c69a3a..5d14be51c2 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -67,6 +67,14 @@ typedef union { typedef vector float (*spu_sample_texture_func)(uint unit, vector float texcoord); +typedef void (*spu_sample_texture4_func)(vector float s, + vector float t, + vector float r, + vector float q, + uint unit, + vector float colors[4]); + + /** Function for performing per-fragment ops */ typedef void (*spu_fragment_ops_func)(uint x, uint y, tile_t *colorTile, @@ -107,10 +115,12 @@ struct spu_texture void *start; ushort width, height; ushort tiles_per_row; - vector float tex_size; - vector unsigned int tex_size_mask; /**< == int(size - 1) */ - vector unsigned int tex_size_x_mask; /**< == int(size - 1) */ - vector unsigned int tex_size_y_mask; /**< == int(size - 1) */ + vector float tex_size; /**< == {width, height, 0, 0} */ + vector float width4; /**< == {width, width, width, width} */ + vector float height4; /**< == {height, height, height, height} */ + vector unsigned int tex_size_mask; /**< == {width-1, height-1, 0, 0 } */ + vector unsigned int tex_size_x_mask; /**< splat(width-1) */ + vector unsigned int tex_size_y_mask; /**< splat(height-1) */ } ALIGN16_ATTRIB; @@ -159,6 +169,7 @@ struct spu_global /** Current texture sampler function */ spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; + spu_sample_texture4_func sample_texture4[CELL_MAX_SAMPLERS]; /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 117b8a36f8..12e6ed1ba1 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -26,6 +26,8 @@ **************************************************************************/ +#include + #include "pipe/p_compiler.h" #include "spu_main.h" #include "spu_texture.h" @@ -91,10 +93,10 @@ static void get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) { const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; - vec_uint4 tile_x = spu_rlmask(x, -5); - vec_uint4 tile_y = spu_rlmask(y, -5); - const qword offset_x = si_andi((qword) x, 0x1f); - const qword offset_y = si_andi((qword) y, 0x1f); + vec_uint4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ + vec_uint4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ + const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ + const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row); const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); @@ -132,6 +134,31 @@ sample_texture_nearest(uint unit, vector float texcoord) } +/** + * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). + */ +void +sample_texture4_nearest(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]) +{ + vector float ss = spu_mul(s, spu.texture[unit].width4); + vector float tt = spu_mul(t, spu.texture[unit].height4); + vector unsigned int is = spu_convtu(ss, 0); + vector unsigned int it = spu_convtu(tt, 0); + vec_uint4 texels[4]; + + /* GL_REPEAT wrap mode: */ + is = spu_and(is, spu.texture[unit].tex_size_x_mask); + it = spu_and(it, spu.texture[unit].tex_size_y_mask); + + get_four_texels(unit, is, it, texels); + + /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ + spu_unpack_A8R8G8B8_transpose4(texels, colors); +} + + vector float sample_texture_bilinear(uint unit, vector float texcoord) { @@ -198,3 +225,93 @@ sample_texture_bilinear(uint unit, vector float texcoord) return texel_sum; } + + +void +sample_texture4_bilinear(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]) +{ + vector float ss = spu_madd(s, spu.texture[unit].width4, spu_splats(-0.5f)); + vector float tt = spu_madd(t, spu.texture[unit].height4, spu_splats(-0.5f)); + + vector unsigned int is0 = spu_convtu(ss, 0); + vector unsigned int it0 = spu_convtu(tt, 0); + + /* is + 1, it + 1 */ + vector unsigned int is1 = spu_add(is0, 1); + vector unsigned int it1 = spu_add(it0, 1); + + /* PIPE_TEX_WRAP_REPEAT */ + is0 = spu_and(is0, spu.texture[unit].tex_size_x_mask); + it0 = spu_and(it0, spu.texture[unit].tex_size_y_mask); + is1 = spu_and(is1, spu.texture[unit].tex_size_x_mask); + it1 = spu_and(it1, spu.texture[unit].tex_size_y_mask); + + /* get packed int texels */ + vector unsigned int texels[16]; + get_four_texels(unit, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, is1, it1, texels + 12); /* lower-right */ + + /* XXX possibly rework following code to compute the weighted sample + * colors with integer arithmetic for fewer int->float conversions. + */ + + /* convert packed int texels to float colors */ + vector float ftexels[16]; + spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0); + spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4); + spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8); + spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12); + + /* Compute weighting factors in [0,1] + * Multiply texcoord by 1024, AND with 1023, convert back to float. + */ + vector float ss1024 = spu_mul(ss, spu_splats(1024.0f)); + vector signed int iss1024 = spu_convts(ss1024, 0); + iss1024 = spu_and(iss1024, 1023); + vector float sWeights0 = spu_convtf(iss1024, 10); + + vector float tt1024 = spu_mul(tt, spu_splats(1024.0f)); + vector signed int itt1024 = spu_convts(tt1024, 0); + itt1024 = spu_and(itt1024, 1023); + vector float tWeights0 = spu_convtf(itt1024, 10); + + /* 1 - sWeight and 1 - tWeight */ + vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0); + vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0); + + /* reds, for four pixels */ + ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]), + spu_add(ftexels[8], ftexels[12])); + + /* greens, for four pixels */ + ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]), + spu_add(ftexels[9], ftexels[13])); + + /* blues, for four pixels */ + ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]), + spu_add(ftexels[10], ftexels[14])); + + /* alphas, for four pixels */ + ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]), + spu_add(ftexels[11], ftexels[15])); +} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index f7c9738be8..f019e7d8ef 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -40,8 +40,20 @@ extern vector float sample_texture_nearest(uint unit, vector float texcoord); +extern void +sample_texture4_nearest(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]); + + extern vector float sample_texture_bilinear(uint unit, vector float texcoord); +extern void +sample_texture4_bilinear(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]); + + #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From c8fb3682619ea49c5fefdf8b88cdb95eac7478ff Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 13 Oct 2008 11:16:04 -0600 Subject: cell: remove old texture code --- src/gallium/drivers/cell/spu/spu_command.c | 2 - src/gallium/drivers/cell/spu/spu_funcs.c | 19 ------- src/gallium/drivers/cell/spu/spu_main.h | 4 -- src/gallium/drivers/cell/spu/spu_texture.c | 88 ++---------------------------- src/gallium/drivers/cell/spu/spu_texture.h | 8 --- src/gallium/drivers/cell/spu/spu_tri.c | 67 +---------------------- 6 files changed, 7 insertions(+), 181 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index c59be7defd..d4cc9a2146 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -302,11 +302,9 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) spu.sampler[sampler->unit] = sampler->state; if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { - spu.sample_texture[sampler->unit] = sample_texture_bilinear; spu.sample_texture4[sampler->unit] = sample_texture4_bilinear; } else { - spu.sample_texture[sampler->unit] = sample_texture_nearest; spu.sample_texture4[sampler->unit] = sample_texture4_nearest; } } diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 13c234ea2e..4c90b701ee 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -104,27 +104,8 @@ static struct vec_4x4 spu_txp(vector float s, vector float t, vector float r, vector float q, unsigned unit) { - //const uint unit = 0; struct vec_4x4 colors; -#if 0 - vector float coords[4]; - - coords[0] = s; - coords[1] = t; - coords[2] = r; - coords[3] = q; - _transpose_matrix4x4(coords, coords); - - /* get four texture samples */ - colors.v[0] = spu.sample_texture[unit](unit, coords[0]); - colors.v[1] = spu.sample_texture[unit](unit, coords[1]); - colors.v[2] = spu.sample_texture[unit](unit, coords[2]); - colors.v[3] = spu.sample_texture[unit](unit, coords[3]); - - _transpose_matrix4x4(colors.v, colors.v); -#else spu.sample_texture4[unit](s, t, r, q, unit, colors.v); -#endif return colors; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 5d14be51c2..2a8cb00f8d 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -64,9 +64,6 @@ typedef union { /** Function for sampling textures */ -typedef vector float (*spu_sample_texture_func)(uint unit, - vector float texcoord); - typedef void (*spu_sample_texture4_func)(vector float s, vector float t, vector float r, @@ -168,7 +165,6 @@ struct spu_global spu_fragment_program_func fragment_program; /** Current texture sampler function */ - spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; spu_sample_texture4_func sample_texture4[CELL_MAX_SAMPLERS]; /** Fragment program constants */ diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 12e6ed1ba1..ba62ad27fd 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -120,21 +120,9 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) } -/** - * Get texture sample at texcoord. - */ -vector float -sample_texture_nearest(uint unit, vector float texcoord) -{ - vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); - vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ - itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */ - uint texel = get_texel(unit, itc); - return spu_unpack_A8R8G8B8(texel); -} - /** + * Do nearest texture sampling for four pixels. * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). */ void @@ -148,7 +136,7 @@ sample_texture4_nearest(vector float s, vector float t, vector unsigned int it = spu_convtu(tt, 0); vec_uint4 texels[4]; - /* GL_REPEAT wrap mode: */ + /* PIPE_TEX_WRAP_REPEAT */ is = spu_and(is, spu.texture[unit].tex_size_x_mask); it = spu_and(it, spu.texture[unit].tex_size_y_mask); @@ -159,74 +147,10 @@ sample_texture4_nearest(vector float s, vector float t, } -vector float -sample_texture_bilinear(uint unit, vector float texcoord) -{ - static const vec_uint4 offset_x = {0, 0, 1, 1}; - static const vec_uint4 offset_y = {0, 1, 0, 1}; - - vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); - tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ - - /* integer texcoords S,T: */ - vec_uint4 itc = spu_convtu(tc, 0); /* convert to int */ - - vec_uint4 texels[4]; - - /* setup texcoords for quad: - * +-----+-----+ - * |x0,y0|x1,y1| - * +-----+-----+ - * |x2,y2|x3,y3| - * +-----+-----+ - */ - vec_uint4 x = spu_splats(spu_extract(itc, 0)); - vec_uint4 y = spu_splats(spu_extract(itc, 1)); - x = spu_add(x, offset_x); - y = spu_add(y, offset_y); - - /* GL_REPEAT wrap mode: */ - x = spu_and(x, spu.texture[unit].tex_size_x_mask); - y = spu_and(y, spu.texture[unit].tex_size_y_mask); - - get_four_texels(unit, x, y, texels); - - /* integer A8R8G8B8 to float texel conversion */ - vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0)); - vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0)); - vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0)); - vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0)); - - - /* Compute weighting factors in [0,1] - * Multiply texcoord by 1024, AND with 1023, convert back to float. - */ - vector float tc1024 = spu_mul(tc, spu_splats(1024.0f)); - vector signed int itc1024 = spu_convts(tc1024, 0); - itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1)); - vector float weight = spu_convtf(itc1024, 10); - - /* smeared frac and 1-frac */ - vector float sfrac = spu_splats(spu_extract(weight, 0)); - vector float tfrac = spu_splats(spu_extract(weight, 1)); - vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac); - vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac); - - /* multiply the samples (colors) by the S/T weights */ - texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1); - texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1); - texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac ); - texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac ); - - /* compute sum of weighted samples */ - vector float texel_sum = spu_add(texel00, texel01); - texel_sum = spu_add(texel_sum, texel10); - texel_sum = spu_add(texel_sum, texel11); - - return texel_sum; -} - - +/** + * Do bilinear texture sampling for four pixels. + * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). + */ void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index f019e7d8ef..d576aed719 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -36,20 +36,12 @@ extern void invalidate_tex_cache(void); -extern vector float -sample_texture_nearest(uint unit, vector float texcoord); - - extern void sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, uint unit, vector float colors[4]); -extern vector float -sample_texture_bilinear(uint unit, vector float texcoord); - - extern void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index a62d4f0f2f..022d21ba8f 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -286,72 +286,7 @@ emit_quad( int x, int y, mask_t mask) spu.cur_ctile_status = TILE_STATUS_DIRTY; spu.cur_ztile_status = TILE_STATUS_DIRTY; - if (0/*spu.texture[0].start*/) { - /* - * Temporary texture mapping path - * This will go away when fragment programs support TEX inst. - */ - const uint unit = 0; - vector float colors[4]; - vector float texcoords[4]; - eval_coeff(2, (float) x, (float) y, texcoords); - - if (spu_extract(mask, 0)) - colors[0] = spu.sample_texture[unit](unit, texcoords[0]); - if (spu_extract(mask, 1)) - colors[1] = spu.sample_texture[unit](unit, texcoords[1]); - if (spu_extract(mask, 2)) - colors[2] = spu.sample_texture[unit](unit, texcoords[2]); - if (spu_extract(mask, 3)) - colors[3] = spu.sample_texture[unit](unit, texcoords[3]); - - - if (spu.texture[1].start) { - /* multi-texture mapping */ - const uint unit = 1; - vector float colors1[4]; - - eval_coeff(2, (float) x, (float) y, texcoords); - - if (spu_extract(mask, 0)) - colors1[0] = spu.sample_texture[unit](unit, texcoords[0]); - if (spu_extract(mask, 1)) - colors1[1] = spu.sample_texture[unit](unit, texcoords[1]); - if (spu_extract(mask, 2)) - colors1[2] = spu.sample_texture[unit](unit, texcoords[2]); - if (spu_extract(mask, 3)) - colors1[3] = spu.sample_texture[unit](unit, texcoords[3]); - - /* hack: modulate first texture by second */ - colors[0] = spu_mul(colors[0], colors1[0]); - colors[1] = spu_mul(colors[1], colors1[1]); - colors[2] = spu_mul(colors[2], colors1[2]); - colors[3] = spu_mul(colors[3], colors1[3]); - } - - { - /* Convert fragment data from AoS to SoA format. - * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) - * This is temporary! - */ - vector float soa_frag[4]; - _transpose_matrix4x4(soa_frag, colors); - - vector float fragZ = eval_z((float) x, (float) y); - - /* Do all per-fragment/quad operations here, including: - * alpha test, z test, stencil test, blend and framebuffer writing. - */ - spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, - fragZ, - soa_frag[0], soa_frag[1], - soa_frag[2], soa_frag[3], - mask, - setup.facing); - } - - } - else { + { /* * Run fragment shader, execute per-fragment ops, update fb/tile. */ -- cgit v1.2.3 From 978799beb2a9c51550abb1f37bb6f63d06bc4717 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 13 Oct 2008 16:43:11 -0600 Subject: cell: initial work for mipmap texture filtering --- src/gallium/drivers/cell/common.h | 6 +- src/gallium/drivers/cell/ppu/cell_screen.c | 4 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 18 ++-- src/gallium/drivers/cell/ppu/cell_texture.c | 48 ++++++---- src/gallium/drivers/cell/ppu/cell_texture.h | 6 +- src/gallium/drivers/cell/spu/spu_command.c | 37 +++++--- src/gallium/drivers/cell/spu/spu_funcs.c | 1 + src/gallium/drivers/cell/spu/spu_main.h | 7 +- src/gallium/drivers/cell/spu/spu_texture.c | 120 ++++++++++++++++++------- src/gallium/drivers/cell/spu/spu_texture.h | 6 ++ 10 files changed, 176 insertions(+), 77 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 5dc756023f..e4de9a551d 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -67,6 +67,7 @@ #define CELL_MAX_SPUS 6 #define CELL_MAX_SAMPLERS 4 +#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */ #define TILE_SIZE 32 @@ -251,8 +252,9 @@ struct cell_command_texture { uint64_t opcode; /**< CELL_CMD_STATE_TEXTURE */ uint unit; - void *start; /**< Address in main memory */ - ushort width, height; + void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */ + ushort width[CELL_MAX_TEXTURE_LEVELS]; + ushort height[CELL_MAX_TEXTURE_LEVELS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 47ba6fa290..d223557950 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -76,11 +76,11 @@ cell_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 10; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 12; /* max 2Kx2K */ + return CELL_MAX_TEXTURE_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return 8; /* max 128x128x128 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 12; /* max 2Kx2K */ + return CELL_MAX_TEXTURE_LEVELS; default: return 10; } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index cbfa393cfb..7090b4c99f 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -211,14 +211,20 @@ cell_emit_state(struct cell_context *cell) texture->opcode = CELL_CMD_STATE_TEXTURE; texture->unit = i; if (cell->texture[i]) { - texture->start = cell->texture[i]->tiled_data; - texture->width = cell->texture[i]->base.width[0]; - texture->height = cell->texture[i]->base.height[0]; + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + texture->start[level] = cell->texture[i]->tiled_data[level]; + texture->width[level] = cell->texture[i]->base.width[level]; + texture->height[level] = cell->texture[i]->base.height[level]; + } } else { - texture->start = NULL; - texture->width = 1; - texture->height = 1; + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + texture->start[level] = NULL; + texture->width[level] = 1; + texture->height[level] = 1; + } } } } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index b6590dfb86..f5f81ac3cc 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -66,6 +66,8 @@ cell_texture_layout(struct cell_texture * spt) unsigned size; unsigned w_tile, h_tile; + assert(level < CELL_MAX_TEXTURE_LEVELS); + /* width, height, rounded up to tile size */ w_tile = align(width, TILE_SIZE); h_tile = align(height, TILE_SIZE); @@ -249,33 +251,41 @@ cell_tile_texture(struct cell_context *cell, struct cell_texture *texture) { struct pipe_screen *screen = cell->pipe.screen; - uint face = 0, level = 0, zslice = 0; - struct pipe_surface *surf; - const uint w = texture->base.width[0], h = texture->base.height[0]; + uint face = 0, level, zslice = 0; const uint *src; - /* temporary restrictions: */ - assert(w >= TILE_SIZE); - assert(h >= TILE_SIZE); - assert(w % TILE_SIZE == 0); - assert(h % TILE_SIZE == 0); + for (level = 0; level <= texture->base.last_level; level++) { + if (!texture->tiled_data[level]) { + struct pipe_surface *surf; - surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice, - PIPE_BUFFER_USAGE_CPU_WRITE); - ASSERT(surf); + const uint w = texture->base.width[level], h = texture->base.height[level]; - src = (const uint *) pipe_surface_map(surf, PIPE_BUFFER_USAGE_CPU_WRITE); + if (w < 32 || h < 32) + continue; + /* temporary restrictions: */ + assert(w >= TILE_SIZE); + assert(h >= TILE_SIZE); + assert(w % TILE_SIZE == 0); + assert(h % TILE_SIZE == 0); - if (texture->tiled_data) { - align_free(texture->tiled_data); - } - texture->tiled_data = align_malloc(w * h * 4, 16); + surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); + ASSERT(surf); + + src = (const uint *) pipe_surface_map(surf, PIPE_BUFFER_USAGE_CPU_WRITE); - tile_copy_data(w, h, TILE_SIZE, texture->tiled_data, src); + if (texture->tiled_data[level]) { + align_free(texture->tiled_data[level]); + } + texture->tiled_data[level] = align_malloc(w * h * 4, 16); - pipe_surface_unmap(surf); + tile_copy_data(w, h, TILE_SIZE, texture->tiled_data[level], src); - pipe_surface_reference(&surf, NULL); + pipe_surface_unmap(surf); + + pipe_surface_reference(&surf, NULL); + } + } } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index 6d37e95ebc..6d35736984 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -40,15 +40,15 @@ struct cell_texture { struct pipe_texture base; - unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS]; + unsigned long stride[CELL_MAX_TEXTURE_LEVELS]; /* The data is held here: */ struct pipe_buffer *buffer; unsigned long buffer_size; - void *tiled_data; /* XXX this may be temporary */ /*ALIGN16*/ + void *tiled_data[CELL_MAX_TEXTURE_LEVELS]; /* XXX this may be temporary */ /*ALIGN16*/ }; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 64890f6dbd..089af22415 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -301,6 +301,12 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); spu.sampler[sampler->unit] = sampler->state; +#if 0 + if (spu.sampler[sampler->unit].min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + spu.sample_texture4[sampler->unit] = sample_texture4_lod; + } + else +#endif if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { spu.sample_texture4[sampler->unit] = sample_texture4_bilinear; } @@ -314,24 +320,29 @@ static void cmd_state_texture(const struct cell_command_texture *texture) { const uint unit = texture->unit; - const uint width = texture->width; - const uint height = texture->height; + uint i; - DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n", - texture->unit, texture->start, - texture->width, texture->height); + DEBUG_PRINTF("TEXTURE [%u]\n", texture->unit); - spu.texture[unit].start = texture->start; - spu.texture[unit].width = width; - spu.texture[unit].height = height; + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + uint width = texture->width[i]; + uint height = texture->height[i]; - spu.texture[unit].width4 = spu_splats((float) width); - spu.texture[unit].height4 = spu_splats((float) height); + DEBUG_PRINTF(" LEVEL %u: at %p size[0] %u x %u\n", i, + texture->start[i], texture->width[i], texture->height[i]); - spu.texture[unit].tiles_per_row = width / TILE_SIZE; + spu.texture[unit].level[i].start = texture->start[i]; + spu.texture[unit].level[i].width = width; + spu.texture[unit].level[i].height = height; - spu.texture[unit].tex_size_x_mask = spu_splats(width - 1); - spu.texture[unit].tex_size_y_mask = spu_splats(height - 1); + spu.texture[unit].level[i].tiles_per_row = width / TILE_SIZE; + + spu.texture[unit].level[i].width4 = spu_splats((float) width); + spu.texture[unit].level[i].height4 = spu_splats((float) height); + + spu.texture[unit].level[i].tex_size_x_mask = spu_splats(width - 1); + spu.texture[unit].level[i].tex_size_y_mask = spu_splats(height - 1); + } } diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 4c90b701ee..f2946010bd 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -100,6 +100,7 @@ spu_log2(vector float x) return spu_mul(v, k); } + static struct vec_4x4 spu_txp(vector float s, vector float t, vector float r, vector float q, unsigned unit) diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index e3960dbe8b..9515543efe 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -107,7 +107,7 @@ struct spu_framebuffer } ALIGN16_ATTRIB; -struct spu_texture +struct spu_texture_level { void *start; ushort width, height; @@ -118,6 +118,11 @@ struct spu_texture vector unsigned int tex_size_y_mask; /**< splat(height-1) */ } ALIGN16_ATTRIB; +struct spu_texture +{ + struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; +} ALIGN16_ATTRIB; + /** * All SPU global/context state will be in a singleton object of this type: diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 96ef88822a..96c09e3ccb 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -27,6 +27,7 @@ #include +#include #include "pipe/p_compiler.h" #include "spu_main.h" @@ -42,11 +43,12 @@ void invalidate_tex_cache(void) { + uint lvl = 0; uint unit = 0; - uint bytes = 4 * spu.texture[unit].width - * spu.texture[unit].height; + uint bytes = 4 * spu.texture[unit].level[lvl].width + * spu.texture[unit].level[lvl].height; - spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes); + spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); } @@ -64,15 +66,17 @@ invalidate_tex_cache(void) * a time. */ static void -get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) +get_four_texels(uint unit, uint level, vec_uint4 x, vec_uint4 y, + vec_uint4 *texels) { - const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + const unsigned texture_ea = (uintptr_t) tlevel->start; vec_uint4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ vec_uint4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ - const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row); + const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row); const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); @@ -104,17 +108,18 @@ sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, uint unit, vector float colors[4]) { - vector float ss = spu_mul(s, spu.texture[unit].width4); - vector float tt = spu_mul(t, spu.texture[unit].height4); + const uint lvl = 0; + vector float ss = spu_mul(s, spu.texture[unit].level[lvl].width4); + vector float tt = spu_mul(t, spu.texture[unit].level[lvl].height4); vector unsigned int is = spu_convtu(ss, 0); vector unsigned int it = spu_convtu(tt, 0); vec_uint4 texels[4]; /* PIPE_TEX_WRAP_REPEAT */ - is = spu_and(is, spu.texture[unit].tex_size_x_mask); - it = spu_and(it, spu.texture[unit].tex_size_y_mask); + is = spu_and(is, spu.texture[unit].level[lvl].tex_size_x_mask); + it = spu_and(it, spu.texture[unit].level[lvl].tex_size_y_mask); - get_four_texels(unit, is, it, texels); + get_four_texels(unit, lvl, is, it, texels); /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ spu_unpack_A8R8G8B8_transpose4(texels, colors); @@ -130,8 +135,9 @@ sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, uint unit, vector float colors[4]) { - vector float ss = spu_madd(s, spu.texture[unit].width4, spu_splats(-0.5f)); - vector float tt = spu_madd(t, spu.texture[unit].height4, spu_splats(-0.5f)); + const uint lvl = 0; + vector float ss = spu_madd(s, spu.texture[unit].level[lvl].width4, spu_splats(-0.5f)); + vector float tt = spu_madd(t, spu.texture[unit].level[lvl].height4, spu_splats(-0.5f)); vector unsigned int is0 = spu_convtu(ss, 0); vector unsigned int it0 = spu_convtu(tt, 0); @@ -141,17 +147,17 @@ sample_texture4_bilinear(vector float s, vector float t, vector unsigned int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].tex_size_y_mask); + is0 = spu_and(is0, spu.texture[unit].level[lvl].tex_size_x_mask); + it0 = spu_and(it0, spu.texture[unit].level[lvl].tex_size_y_mask); + is1 = spu_and(is1, spu.texture[unit].level[lvl].tex_size_x_mask); + it1 = spu_and(it1, spu.texture[unit].level[lvl].tex_size_y_mask); /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, lvl, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, lvl, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, lvl, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, lvl, is1, it1, texels + 12); /* lower-right */ /* XXX possibly rework following code to compute the weighted sample * colors with integer arithmetic for fewer int->float conversions. @@ -270,10 +276,11 @@ sample_texture4_bilinear_2(vector float s, vector float t, vector float r, vector float q, uint unit, vector float colors[4]) { + const uint lvl = 0; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; /* Scale texcoords by size of texture, and add half pixel bias */ - vector float ss = spu_madd(s, spu.texture[unit].width4, half); - vector float tt = spu_madd(t, spu.texture[unit].height4, half); + vector float ss = spu_madd(s, spu.texture[unit].level[lvl].width4, half); + vector float tt = spu_madd(t, spu.texture[unit].level[lvl].height4, half); /* convert float coords to fixed-pt coords with 8 fraction bits */ vector unsigned int is = spu_convtu(ss, 8); @@ -294,17 +301,17 @@ sample_texture4_bilinear_2(vector float s, vector float t, vector unsigned int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].tex_size_y_mask); + is0 = spu_and(is0, spu.texture[unit].level[lvl].tex_size_x_mask); + it0 = spu_and(it0, spu.texture[unit].level[lvl].tex_size_y_mask); + is1 = spu_and(is1, spu.texture[unit].level[lvl].tex_size_x_mask); + it1 = spu_and(it1, spu.texture[unit].level[lvl].tex_size_y_mask); /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, lvl, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, lvl, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, lvl, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, lvl, is1, it1, texels + 12); /* lower-right */ /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ { @@ -363,3 +370,54 @@ sample_texture4_bilinear_2(vector float s, vector float t, cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); colors[3] = spu_convtf(cSum, 24); } + + + +/** + * Compute level of detail factor from texcoords. + */ +static float +compute_lambda(uint unit, vector float s, vector float t) +{ + uint lvl = 0; + float width = spu.texture[unit].level[lvl].width; + float height = spu.texture[unit].level[lvl].width; + float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0)); + float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); + float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); + float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0)); + float x = dsdx * dsdx + dtdx * dtdx; + float y = dsdy * dsdy + dtdy * dtdy; + float rho = x > y ? x : y; + rho = sqrtf(rho); + float lambda = logf(rho) * 1.442695f; + return lambda; +} + + + +/** + * Texture sampling with level of detail selection. + */ +void +sample_texture4_lod(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]) +{ + float lambda = compute_lambda(unit, s, t); + + if (lambda < spu.sampler[unit].min_lod) + lambda = spu.sampler[unit].min_lod; + else if (lambda > spu.sampler[unit].max_lod) + lambda = spu.sampler[unit].max_lod; + + /* hack for now */ + int level = (int) lambda; + if (level > 3) + level = 3; + + /* + sample_texture4_bilinear_2(s, t, r, q, unit, level, colors); + */ +} + diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index 38a17deda2..4802f7c47c 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -53,4 +53,10 @@ sample_texture4_bilinear_2(vector float s, vector float t, uint unit, vector float colors[4]); +extern void +sample_texture4_lod(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]); + + #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From f8bddf698d523f597fea0f721b064daee81d8005 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 14 Oct 2008 12:11:52 -0600 Subject: cell: basic mipmap filtering works now Though, only GL_MIPMAP_NEAREST / GL_LINEAR works right now. --- src/gallium/drivers/cell/spu/spu_command.c | 21 ++++-- src/gallium/drivers/cell/spu/spu_funcs.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 3 +- src/gallium/drivers/cell/spu/spu_texture.c | 106 +++++++++++++++-------------- src/gallium/drivers/cell/spu/spu_texture.h | 8 +-- 5 files changed, 79 insertions(+), 61 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 089af22415..4e98eea338 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -301,16 +301,18 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); spu.sampler[sampler->unit] = sampler->state; -#if 0 + if (spu.sampler[sampler->unit].min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + /* use lambda/lod to determine min vs. mag filter */ spu.sample_texture4[sampler->unit] = sample_texture4_lod; } - else -#endif - if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { + else if (spu.sampler[sampler->unit].min_img_filter + == PIPE_TEX_FILTER_LINEAR) { + /* min = mag = bilinear */ spu.sample_texture4[sampler->unit] = sample_texture4_bilinear; } else { + /* min = mag = inearest */ spu.sample_texture4[sampler->unit] = sample_texture4_nearest; } } @@ -322,8 +324,12 @@ cmd_state_texture(const struct cell_command_texture *texture) const uint unit = texture->unit; uint i; + //if (spu.init.id==0) Debug=1; + DEBUG_PRINTF("TEXTURE [%u]\n", texture->unit); + spu.texture[unit].max_level = 0; + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { uint width = texture->width[i]; uint height = texture->height[i]; @@ -335,14 +341,19 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].level[i].width = width; spu.texture[unit].level[i].height = height; - spu.texture[unit].level[i].tiles_per_row = width / TILE_SIZE; + spu.texture[unit].level[i].tiles_per_row = + (width + TILE_SIZE - 1) / TILE_SIZE; spu.texture[unit].level[i].width4 = spu_splats((float) width); spu.texture[unit].level[i].height4 = spu_splats((float) height); spu.texture[unit].level[i].tex_size_x_mask = spu_splats(width - 1); spu.texture[unit].level[i].tex_size_y_mask = spu_splats(height - 1); + + if (texture->start[i]) + spu.texture[unit].max_level = i; } + //Debug=0; } diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index f2946010bd..66b82f673d 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -106,7 +106,7 @@ spu_txp(vector float s, vector float t, vector float r, vector float q, unsigned unit) { struct vec_4x4 colors; - spu.sample_texture4[unit](s, t, r, q, unit, colors.v); + spu.sample_texture4[unit](s, t, r, q, unit, 0, colors.v); return colors; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 9515543efe..cfb645add0 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -68,7 +68,7 @@ typedef void (*spu_sample_texture4_func)(vector float s, vector float t, vector float r, vector float q, - uint unit, + uint unit, uint level, vector float colors[4]); @@ -121,6 +121,7 @@ struct spu_texture_level struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; + uint max_level; } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 96c09e3ccb..10036330c6 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -26,7 +26,6 @@ **************************************************************************/ -#include #include #include "pipe/p_compiler.h" @@ -43,12 +42,14 @@ void invalidate_tex_cache(void) { - uint lvl = 0; - uint unit = 0; - uint bytes = 4 * spu.texture[unit].level[lvl].width - * spu.texture[unit].level[lvl].height; + uint lvl; + for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) { + uint unit = 0; + uint bytes = 4 * spu.texture[unit].level[lvl].width + * spu.texture[unit].level[lvl].height; - spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); + spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); + } } @@ -71,8 +72,8 @@ get_four_texels(uint unit, uint level, vec_uint4 x, vec_uint4 y, { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; const unsigned texture_ea = (uintptr_t) tlevel->start; - vec_uint4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ - vec_uint4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ + const vec_uint4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ + const vec_uint4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ @@ -106,20 +107,19 @@ get_four_texels(uint unit, uint level, vec_uint4 x, vec_uint4 y, void sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]) + uint unit, uint level, vector float colors[4]) { - const uint lvl = 0; - vector float ss = spu_mul(s, spu.texture[unit].level[lvl].width4); - vector float tt = spu_mul(t, spu.texture[unit].level[lvl].height4); + vector float ss = spu_mul(s, spu.texture[unit].level[level].width4); + vector float tt = spu_mul(t, spu.texture[unit].level[level].height4); vector unsigned int is = spu_convtu(ss, 0); vector unsigned int it = spu_convtu(tt, 0); vec_uint4 texels[4]; /* PIPE_TEX_WRAP_REPEAT */ - is = spu_and(is, spu.texture[unit].level[lvl].tex_size_x_mask); - it = spu_and(it, spu.texture[unit].level[lvl].tex_size_y_mask); + is = spu_and(is, spu.texture[unit].level[level].tex_size_x_mask); + it = spu_and(it, spu.texture[unit].level[level].tex_size_y_mask); - get_four_texels(unit, lvl, is, it, texels); + get_four_texels(unit, level, is, it, texels); /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ spu_unpack_A8R8G8B8_transpose4(texels, colors); @@ -133,11 +133,10 @@ sample_texture4_nearest(vector float s, vector float t, void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]) + uint unit, uint level, vector float colors[4]) { - const uint lvl = 0; - vector float ss = spu_madd(s, spu.texture[unit].level[lvl].width4, spu_splats(-0.5f)); - vector float tt = spu_madd(t, spu.texture[unit].level[lvl].height4, spu_splats(-0.5f)); + vector float ss = spu_madd(s, spu.texture[unit].level[level].width4, spu_splats(-0.5f)); + vector float tt = spu_madd(t, spu.texture[unit].level[level].height4, spu_splats(-0.5f)); vector unsigned int is0 = spu_convtu(ss, 0); vector unsigned int it0 = spu_convtu(tt, 0); @@ -147,17 +146,17 @@ sample_texture4_bilinear(vector float s, vector float t, vector unsigned int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].level[lvl].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].level[lvl].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].level[lvl].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].level[lvl].tex_size_y_mask); + is0 = spu_and(is0, spu.texture[unit].level[level].tex_size_x_mask); + it0 = spu_and(it0, spu.texture[unit].level[level].tex_size_y_mask); + is1 = spu_and(is1, spu.texture[unit].level[level].tex_size_x_mask); + it1 = spu_and(it1, spu.texture[unit].level[level].tex_size_y_mask); /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, lvl, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, lvl, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, lvl, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, lvl, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, level, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, level, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, level, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, level, is1, it1, texels + 12); /* lower-right */ /* XXX possibly rework following code to compute the weighted sample * colors with integer arithmetic for fewer int->float conversions. @@ -273,14 +272,13 @@ transpose(vector unsigned int *mOut0, */ void sample_texture4_bilinear_2(vector float s, vector float t, - vector float r, vector float q, - uint unit, vector float colors[4]) + vector float r, vector float q, + uint unit, uint level, vector float colors[4]) { - const uint lvl = 0; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; /* Scale texcoords by size of texture, and add half pixel bias */ - vector float ss = spu_madd(s, spu.texture[unit].level[lvl].width4, half); - vector float tt = spu_madd(t, spu.texture[unit].level[lvl].height4, half); + vector float ss = spu_madd(s, spu.texture[unit].level[level].width4, half); + vector float tt = spu_madd(t, spu.texture[unit].level[level].height4, half); /* convert float coords to fixed-pt coords with 8 fraction bits */ vector unsigned int is = spu_convtu(ss, 8); @@ -301,17 +299,17 @@ sample_texture4_bilinear_2(vector float s, vector float t, vector unsigned int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].level[lvl].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].level[lvl].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].level[lvl].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].level[lvl].tex_size_y_mask); + is0 = spu_and(is0, spu.texture[unit].level[level].tex_size_x_mask); + it0 = spu_and(it0, spu.texture[unit].level[level].tex_size_y_mask); + is1 = spu_and(is1, spu.texture[unit].level[level].tex_size_x_mask); + it1 = spu_and(it1, spu.texture[unit].level[level].tex_size_y_mask); /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, lvl, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, lvl, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, lvl, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, lvl, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, level, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, level, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, level, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, level, is1, it1, texels + 12); /* lower-right */ /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ { @@ -379,9 +377,9 @@ sample_texture4_bilinear_2(vector float s, vector float t, static float compute_lambda(uint unit, vector float s, vector float t) { - uint lvl = 0; - float width = spu.texture[unit].level[lvl].width; - float height = spu.texture[unit].level[lvl].width; + uint baseLevel = 0; + float width = spu.texture[unit].level[baseLevel].width; + float height = spu.texture[unit].level[baseLevel].width; float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0)); float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); @@ -402,22 +400,30 @@ compute_lambda(uint unit, vector float s, vector float t) void sample_texture4_lod(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]) + uint unit, uint level, vector float colors[4]) { + /* + * Note that we're computing a lambda/lod here that's used for all + * four pixels in the quad. + */ float lambda = compute_lambda(unit, s, t); + /* apply lod bias */ + lambda += spu.sampler[unit].lod_bias; + + /* clamp */ if (lambda < spu.sampler[unit].min_lod) lambda = spu.sampler[unit].min_lod; else if (lambda > spu.sampler[unit].max_lod) lambda = spu.sampler[unit].max_lod; - /* hack for now */ - int level = (int) lambda; - if (level > 3) - level = 3; + /* convert to int level */ + level = (int) (lambda + 0.5f); + ASSERT(level >= 0); + + if (level > spu.texture[unit].max_level) + level = spu.texture[unit].max_level; - /* sample_texture4_bilinear_2(s, t, r, q, unit, level, colors); - */ } diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index 4802f7c47c..ec06a50b4a 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -39,24 +39,24 @@ invalidate_tex_cache(void); extern void sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]); + uint unit, uint level, vector float colors[4]); extern void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]); + uint unit, uint level, vector float colors[4]); extern void sample_texture4_bilinear_2(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]); + uint unit, uint level, vector float colors[4]); extern void sample_texture4_lod(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]); + uint unit, uint level, vector float colors[4]); #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From 8f7c6b55ae962e30f32cfec9a14a652d3b5b5943 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 14 Oct 2008 17:11:29 -0600 Subject: cell: support for cubemaps Though, progs/demos/cubemap.c doesn't quite work right... --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_state_emit.c | 2 + src/gallium/drivers/cell/ppu/cell_texture.c | 37 ++++-- src/gallium/drivers/cell/spu/spu_command.c | 17 ++- src/gallium/drivers/cell/spu/spu_funcs.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 4 +- src/gallium/drivers/cell/spu/spu_texture.c | 171 ++++++++++++++++++++++--- src/gallium/drivers/cell/spu/spu_texture.h | 21 ++- 8 files changed, 214 insertions(+), 41 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index e4de9a551d..c1e78f4db3 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -251,6 +251,7 @@ struct cell_command_sampler struct cell_command_texture { uint64_t opcode; /**< CELL_CMD_STATE_TEXTURE */ + uint target; /**< PIPE_TEXTURE_x */ uint unit; void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */ ushort width[CELL_MAX_TEXTURE_LEVELS]; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index cae546b700..d4a867ffcf 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -217,6 +217,7 @@ cell_emit_state(struct cell_context *cell) texture->width[level] = cell->texture[i]->base.width[level]; texture->height[level] = cell->texture[i]->base.height[level]; } + texture->target = cell->texture[i]->base.target; } else { uint level; @@ -225,6 +226,7 @@ cell_emit_state(struct cell_context *cell) texture->width[level] = 0; texture->height[level] = 0; } + texture->target = 0; } } } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 4fd66bdea0..4c92ef154f 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -137,6 +137,7 @@ cell_texture_release(struct pipe_screen *screen, */ if (--(*pt)->refcount <= 0) { struct cell_texture *ct = cell_texture(*pt); + uint i; /* DBG("%s deleting %p\n", __FUNCTION__, (void *) ct); @@ -144,6 +145,12 @@ cell_texture_release(struct pipe_screen *screen, pipe_buffer_reference(screen, &ct->buffer, NULL); + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + if (ct->tiled_data[i]) { + FREE(ct->tiled_data[i]); + } + } + FREE(ct); } *pt = NULL; @@ -204,27 +211,33 @@ static void cell_twiddle_texture(struct pipe_screen *screen, struct pipe_surface *surface) { - struct cell_texture *texture = cell_texture(surface->texture); + struct cell_texture *ct = cell_texture(surface->texture); const uint level = surface->level; - const uint texWidth = texture->base.width[level]; - const uint texHeight = texture->base.height[level]; + const uint texWidth = ct->base.width[level]; + const uint texHeight = ct->base.height[level]; const uint bufWidth = align(texWidth, TILE_SIZE); const uint bufHeight = align(texHeight, TILE_SIZE); const void *map = pipe_buffer_map(screen, surface->buffer, PIPE_BUFFER_USAGE_CPU_READ); const uint *src = (const uint *) ((const ubyte *) map + surface->offset); - switch (texture->base.format) { + switch (ct->base.format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - /* free old tiled data */ - if (texture->tiled_data[level]) { - align_free(texture->tiled_data[level]); + { + int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; + int offset = bufWidth * bufHeight * 4 * surface->face; + uint *dst; + + if (!ct->tiled_data[level]) { + ct->tiled_data[level] = + align_malloc(bufWidth * bufHeight * 4 * numFaces, 16); + } + + dst = (uint *) ((ubyte *) ct->tiled_data[level] + offset); + + twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, + surface->stride, src); } - /* alloc new tiled data */ - texture->tiled_data[level] = align_malloc(bufWidth * bufHeight * 4, 16); - twiddle_image_uint(texWidth, texHeight, TILE_SIZE, - texture->tiled_data[level], - surface->stride, src); break; default: printf("Cell: twiddle unsupported texture format\n"); diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index b1efe97e76..c951fa6f31 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -301,7 +301,8 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) */ static void update_tex_masks(struct spu_texture *texture, - const struct pipe_sampler_state *sampler) + const struct pipe_sampler_state *sampler, + uint unit) { uint i; @@ -328,6 +329,11 @@ update_tex_masks(struct spu_texture *texture, texture->level[i].scale_t = spu_splats(1.0f); } } + + /* XXX temporary hack */ + if (texture->target == PIPE_TEXTURE_CUBE) { + spu.sample_texture4[unit] = sample_texture4_cube; + } } @@ -378,7 +384,7 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) ASSERT(0); } - update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); + update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit); } @@ -393,6 +399,7 @@ cmd_state_texture(const struct cell_command_texture *texture) DEBUG_PRINTF("TEXTURE [%u]\n", texture->unit); spu.texture[unit].max_level = 0; + spu.texture[unit].target = texture->target; for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { uint width = texture->width[i]; @@ -408,6 +415,10 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].level[i].tiles_per_row = (width + TILE_SIZE - 1) / TILE_SIZE; + spu.texture[unit].level[i].bytes_per_image = + 4 * ((width + TILE_SIZE - 1) & ~(TILE_SIZE-1)) + * ((height + TILE_SIZE - 1) & ~(TILE_SIZE-1)); + spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); @@ -415,7 +426,7 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].max_level = i; } - update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); + update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit); //Debug=0; } diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 66b82f673d..5c3ee305d4 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -106,7 +106,7 @@ spu_txp(vector float s, vector float t, vector float r, vector float q, unsigned unit) { struct vec_4x4 colors; - spu.sample_texture4[unit](s, t, r, q, unit, 0, colors.v); + spu.sample_texture4[unit](s, t, r, q, unit, 0, 0, colors.v); return colors; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 45c6f4ced1..8781041bff 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -68,7 +68,7 @@ typedef void (*spu_sample_texture4_func)(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, + uint unit, uint level, uint face, vector float colors[4]); @@ -113,6 +113,7 @@ struct spu_texture_level void *start; ushort width, height; ushort tiles_per_row; + uint bytes_per_image; /** texcoord scale factors */ vector float scale_s, scale_t; /** texcoord masks (if REPEAT then size-1, else ~0) */ @@ -126,6 +127,7 @@ struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; uint max_level; + uint target; /**< PIPE_TEXTURE_x */ } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index b21c43a467..2570f02c73 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -48,6 +48,9 @@ invalidate_tex_cache(void) uint bytes = 4 * spu.texture[unit].level[lvl].width * spu.texture[unit].level[lvl].height; + if (spu.texture[unit].target == PIPE_TEXTURE_CUBE) + bytes *= 6; + spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); } } @@ -67,11 +70,11 @@ invalidate_tex_cache(void) * a time. */ static void -get_four_texels(uint unit, uint level, vec_int4 x, vec_int4 y, +get_four_texels(uint unit, uint level, uint face, vec_int4 x, vec_int4 y, vec_uint4 *texels) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; - const unsigned texture_ea = (uintptr_t) tlevel->start; + unsigned texture_ea = (uintptr_t) tlevel->start; const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ @@ -88,6 +91,8 @@ get_four_texels(uint unit, uint level, vec_int4 x, vec_int4 y, vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); + texture_ea = texture_ea + face * tlevel->bytes_per_image; + spu_dcache_fetch_unaligned((qword *) & texels[0], texture_ea + spu_extract(offset, 0), 4); spu_dcache_fetch_unaligned((qword *) & texels[1], @@ -121,7 +126,8 @@ spu_clamp(vector signed int vec, vector signed int max) void sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]) + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; vector float ss = spu_mul(s, tlevel->scale_s); @@ -138,7 +144,7 @@ sample_texture4_nearest(vector float s, vector float t, is = spu_clamp(is, tlevel->max_s); it = spu_clamp(it, tlevel->max_t); - get_four_texels(unit, level, is, it, texels); + get_four_texels(unit, level, face, is, it, texels); /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ spu_unpack_A8R8G8B8_transpose4(texels, colors); @@ -152,11 +158,14 @@ sample_texture4_nearest(vector float s, vector float t, void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]) + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; - vector float ss = spu_madd(s, tlevel->scale_s, spu_splats(-0.5f)); - vector float tt = spu_madd(t, tlevel->scale_t, spu_splats(-0.5f)); + static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + + vector float ss = spu_madd(s, tlevel->scale_s, half); + vector float tt = spu_madd(t, tlevel->scale_t, half); vector signed int is0 = spu_convts(ss, 0); vector signed int it0 = spu_convts(tt, 0); @@ -179,10 +188,10 @@ sample_texture4_bilinear(vector float s, vector float t, /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, level, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, level, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, level, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, level, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */ /* XXX possibly rework following code to compute the weighted sample * colors with integer arithmetic for fewer int->float conversions. @@ -299,10 +308,12 @@ transpose(vector unsigned int *mOut0, void sample_texture4_bilinear_2(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]) + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + /* Scale texcoords by size of texture, and add half pixel bias */ vector float ss = spu_madd(s, tlevel->scale_s, half); vector float tt = spu_madd(t, tlevel->scale_t, half); @@ -339,10 +350,10 @@ sample_texture4_bilinear_2(vector float s, vector float t, /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, level, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, level, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, level, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, level, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */ /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ { @@ -433,7 +444,8 @@ compute_lambda(uint unit, vector float s, vector float t) void sample_texture4_lod(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level_ignored, vector float colors[4]) + uint unit, uint level_ignored, uint face, + vector float colors[4]) { /* * Note that we're computing a lambda/lod here that's used for all @@ -452,15 +464,136 @@ sample_texture4_lod(vector float s, vector float t, if (lambda <= 0.0f) { /* magnify */ - spu.mag_sample_texture4[unit](s, t, r, q, unit, 0, colors); + spu.mag_sample_texture4[unit](s, t, r, q, unit, 0, 0, colors); } else { /* minify */ int level = (int) (lambda + 0.5f); if (level > (int) spu.texture[unit].max_level) level = spu.texture[unit].max_level; - spu.min_sample_texture4[unit](s, t, r, q, unit, level, colors); + spu.min_sample_texture4[unit](s, t, r, q, unit, level, 0, colors); /* XXX to do: mipmap level interpolation */ } } + +/** XXX need a SIMD version of this */ +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + const float arx = fabsf(rx); + const float ary = fabsf(ry); + const float arz = fabsf(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + *newS = (sc / ma + 1.0F) * 0.5F; + *newT = (tc / ma + 1.0F) * 0.5F; + + return face; +} + + + +void +sample_texture4_cube(vector float s, vector float t, + vector float r, vector float q, + uint unit, uint level, int face_ignored, + vector float colors[4]) +{ + static const vector float zero = {0.0f, 0.0f, 0.0f, 0.0f}; + uint p, faces[4]; + float newS[4], newT[4]; + + /* Compute cube face referenced by the four sets of texcoords. + * XXX we should SIMD-ize this. + */ + for (p = 0; p < 4; p++) { + float rx = spu_extract(s, p); + float ry = spu_extract(t, p); + float rz = spu_extract(r, p); + faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]); + } + + if (faces[0] == faces[1] && + faces[0] == faces[2] && + faces[0] == faces[3]) { + /* GOOD! All four texcoords refer to the same cube face */ + s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; + t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; + sample_texture4_nearest(s, t, zero, zero, unit, level, faces[0], colors); + } + else { + /* BAD! The four texcoords refer to different faces */ + for (p = 0; p < 4; p++) { + vector float c[4]; + + sample_texture4_nearest(spu_splats(newS[p]), spu_splats(newT[p]), + zero, zero, unit, level, faces[p], c); + + float red = spu_extract(c[0], p); + float green = spu_extract(c[1], p); + float blue = spu_extract(c[2], p); + float alpha = spu_extract(c[3], p); + + colors[0] = spu_insert(red, colors[0], p); + colors[1] = spu_insert(green, colors[1], p); + colors[2] = spu_insert(blue, colors[2], p); + colors[3] = spu_insert(alpha, colors[3], p); + } + } +} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index ec06a50b4a..08b891a4a8 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -39,24 +39,35 @@ invalidate_tex_cache(void); extern void sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]); + uint unit, uint level, uint face, + vector float colors[4]); extern void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]); + uint unit, uint level, uint face, + vector float colors[4]); extern void sample_texture4_bilinear_2(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, vector float colors[4]); + vector float r, vector float q, + uint unit, uint level, uint face, + vector float colors[4]); extern void sample_texture4_lod(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]); + uint unit, uint level, uint face, + vector float colors[4]); + + +extern void +sample_texture4_cube(vector float s, vector float t, + vector float r, vector float q, + uint unit, uint level_ignored, int face_ignored, + vector float colors[4]); #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From 926b8dbb3e86360e5968882df94785ae84d0ad43 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 09:00:05 -0600 Subject: cell: clean up various texture-related things Distinguish among texture targets in codegen. progs/demos/cubemap.c runs correctly now too. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 29 ++++++++++++++--- src/gallium/drivers/cell/spu/spu_command.c | 24 ++++++-------- src/gallium/drivers/cell/spu/spu_funcs.c | 34 +++++++++++++++++--- src/gallium/drivers/cell/spu/spu_main.h | 16 +++++----- src/gallium/drivers/cell/spu/spu_texture.c | 50 ++++++++++++++---------------- src/gallium/drivers/cell/spu/spu_texture.h | 34 +++++++++----------- 6 files changed, 107 insertions(+), 80 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 3dfd5f673d..2b34cf1e23 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1337,16 +1337,33 @@ emit_function_call(struct codegen *gen, static boolean -emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) { - const uint addr = lookup_function(gen->cell, "spu_txp"); + const uint target = inst->InstructionExtTexture.Texture; const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint addr; int ch; int coord_regs[4], d_regs[4]; + switch (target) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_2D: + addr = lookup_function(gen->cell, "spu_tex_2d"); + break; + case TGSI_TEXTURE_3D: + addr = lookup_function(gen->cell, "spu_tex_3d"); + break; + case TGSI_TEXTURE_CUBE: + addr = lookup_function(gen->cell, "spu_tex_cube"); + break; + default: + ASSERT(0 && "unsupported texture target"); + return FALSE; + } + assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); - spe_comment(gen->f, -4, "CALL txp:"); + spe_comment(gen->f, -4, "CALL tex:"); /* get src/dst reg info */ for (ch = 0; ch < 4; ch++) { @@ -1368,7 +1385,7 @@ emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); } - /* setup function arguments */ + /* setup function arguments (XXX depends on target) */ for (i = 0; i < 4; i++) { spe_move(gen->f, 3 + i, coord_regs[i]); } @@ -1674,8 +1691,10 @@ emit_instruction(struct codegen *gen, /* fall-through for now */ case TGSI_OPCODE_TXB: /* fall-through for now */ + case TGSI_OPCODE_TXL: + /* fall-through for now */ case TGSI_OPCODE_TXP: - return emit_TXP(gen, inst); + return emit_TEX(gen, inst); case TGSI_OPCODE_IF: return emit_IF(gen, inst); diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 57d265fef7..ff4a52d79a 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -310,8 +310,7 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) */ static void update_tex_masks(struct spu_texture *texture, - const struct pipe_sampler_state *sampler, - uint unit) + const struct pipe_sampler_state *sampler) { uint i; @@ -338,11 +337,6 @@ update_tex_masks(struct spu_texture *texture, texture->level[i].scale_t = spu_splats(1.0f); } } - - /* XXX temporary hack */ - if (texture->target == PIPE_TEXTURE_CUBE) { - spu.sample_texture4[unit] = sample_texture4_cube; - } } @@ -357,12 +351,12 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) switch (spu.sampler[unit].min_img_filter) { case PIPE_TEX_FILTER_LINEAR: - spu.min_sample_texture4[unit] = sample_texture4_bilinear; + spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; case PIPE_TEX_FILTER_ANISO: /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: - spu.min_sample_texture4[unit] = sample_texture4_nearest; + spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; break; default: ASSERT(0); @@ -370,12 +364,12 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) switch (spu.sampler[sampler->unit].mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: - spu.mag_sample_texture4[unit] = sample_texture4_bilinear; + spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; case PIPE_TEX_FILTER_ANISO: /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: - spu.mag_sample_texture4[unit] = sample_texture4_nearest; + spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; break; default: ASSERT(0); @@ -384,16 +378,16 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) switch (spu.sampler[sampler->unit].min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: case PIPE_TEX_MIPFILTER_LINEAR: - spu.sample_texture4[unit] = sample_texture4_lod; + spu.sample_texture_2d[unit] = sample_texture_2d_lod; break; case PIPE_TEX_MIPFILTER_NONE: - spu.sample_texture4[unit] = spu.mag_sample_texture4[unit]; + spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit]; break; default: ASSERT(0); } - update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit); + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); } @@ -434,7 +428,7 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].max_level = i; } - update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit); + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); } diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 5c3ee305d4..3534b35000 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -43,6 +43,7 @@ #include "cell/common.h" #include "spu_main.h" #include "spu_funcs.h" +#include "spu_texture.h" /** For "return"-ing four vectors */ @@ -102,11 +103,34 @@ spu_log2(vector float x) static struct vec_4x4 -spu_txp(vector float s, vector float t, vector float r, vector float q, - unsigned unit) +spu_tex_2d(vector float s, vector float t, vector float r, vector float q, + unsigned unit) { struct vec_4x4 colors; - spu.sample_texture4[unit](s, t, r, q, unit, 0, 0, colors.v); + (void) r; + (void) q; + spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); + return colors; +} + +static struct vec_4x4 +spu_tex_3d(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) r; + (void) q; + spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); + return colors; +} + +static struct vec_4x4 +spu_tex_cube(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) q; + sample_texture_cube(s, t, r, unit, colors.v); return colors; } @@ -147,7 +171,9 @@ return_function_info(void) export_func(&funcs, "spu_pow", &spu_pow); export_func(&funcs, "spu_exp2", &spu_exp2); export_func(&funcs, "spu_log2", &spu_log2); - export_func(&funcs, "spu_txp", &spu_txp); + export_func(&funcs, "spu_tex_2d", &spu_tex_2d); + export_func(&funcs, "spu_tex_3d", &spu_tex_3d); + export_func(&funcs, "spu_tex_cube", &spu_tex_cube); /* Send the function info back to the PPU / main memory */ mfc_put((void *) &funcs, /* src in local store */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 4099e52699..80e9c696f8 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -70,12 +70,10 @@ typedef union { /** Function for sampling textures */ -typedef void (*spu_sample_texture4_func)(vector float s, - vector float t, - vector float r, - vector float q, - uint unit, uint level, uint face, - vector float colors[4]); +typedef void (*spu_sample_texture_2d_func)(vector float s, + vector float t, + uint unit, uint level, uint face, + vector float colors[4]); /** Function for performing per-fragment ops */ @@ -183,9 +181,9 @@ struct spu_global spu_fragment_program_func fragment_program; /** Current texture sampler function */ - spu_sample_texture4_func sample_texture4[CELL_MAX_SAMPLERS]; - spu_sample_texture4_func min_sample_texture4[CELL_MAX_SAMPLERS]; - spu_sample_texture4_func mag_sample_texture4[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS]; /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 42eb06a362..04202a7657 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -126,10 +126,9 @@ spu_clamp(vector signed int vec, vector signed int max) * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). */ void -sample_texture4_nearest(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]) +sample_texture_2d_nearest(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; vector float ss = spu_mul(s, tlevel->scale_s); @@ -158,10 +157,9 @@ sample_texture4_nearest(vector float s, vector float t, * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). */ void -sample_texture4_bilinear(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]) +sample_texture_2d_bilinear(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; @@ -308,10 +306,9 @@ transpose(vector unsigned int *mOut0, * Bilinear filtering, using int intead of float arithmetic */ void -sample_texture4_bilinear_2(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]) +sample_texture_2d_bilinear_int(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; @@ -444,10 +441,9 @@ compute_lambda(uint unit, vector float s, vector float t) * Texture sampling with level of detail selection. */ void -sample_texture4_lod(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level_ignored, uint face, - vector float colors[4]) +sample_texture_2d_lod(vector float s, vector float t, + uint unit, uint level_ignored, uint face, + vector float colors[4]) { /* * Note that we're computing a lambda/lod here that's used for all @@ -455,6 +451,9 @@ sample_texture4_lod(vector float s, vector float t, */ float lambda = compute_lambda(unit, s, t); + (void) face; + (void) level_ignored; + /* apply lod bias */ lambda += spu.sampler[unit].lod_bias; @@ -466,14 +465,14 @@ sample_texture4_lod(vector float s, vector float t, if (lambda <= 0.0f) { /* magnify */ - spu.mag_sample_texture4[unit](s, t, r, q, unit, 0, 0, colors); + spu.mag_sample_texture_2d[unit](s, t, unit, 0, 0, colors); } else { /* minify */ int level = (int) (lambda + 0.5f); if (level > (int) spu.texture[unit].max_level) level = spu.texture[unit].max_level; - spu.min_sample_texture4[unit](s, t, r, q, unit, level, 0, colors); + spu.min_sample_texture_2d[unit](s, t, unit, level, 0, colors); /* XXX to do: mipmap level interpolation */ } } @@ -552,13 +551,10 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) void -sample_texture4_cube(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face_ignored, - vector float colors[4]) +sample_texture_cube(vector float s, vector float t, vector float r, + uint unit, vector float colors[4]) { - static const vector float zero = {0.0f, 0.0f, 0.0f, 0.0f}; - uint p, faces[4]; + uint p, faces[4], level = 0; float newS[4], newT[4]; /* Compute cube face referenced by the four sets of texcoords. @@ -577,15 +573,15 @@ sample_texture4_cube(vector float s, vector float t, /* GOOD! All four texcoords refer to the same cube face */ s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; - sample_texture4_nearest(s, t, zero, zero, unit, level, faces[0], colors); + sample_texture_2d_nearest(s, t, unit, level, faces[0], colors); } else { /* BAD! The four texcoords refer to different faces */ for (p = 0; p < 4; p++) { vector float c[4]; - sample_texture4_nearest(spu_splats(newS[p]), spu_splats(newT[p]), - zero, zero, unit, level, faces[p], c); + sample_texture_2d_nearest(spu_splats(newS[p]), spu_splats(newT[p]), + unit, level, faces[p], c); float red = spu_extract(c[0], p); float green = spu_extract(c[1], p); diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index 387484c3ad..7b75b007b5 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -37,37 +37,31 @@ invalidate_tex_cache(void); extern void -sample_texture4_nearest(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]); +sample_texture_2d_nearest(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); extern void -sample_texture4_bilinear(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]); - -extern void -sample_texture4_bilinear_2(vector float s, vector float t, - vector float r, vector float q, +sample_texture_2d_bilinear(vector float s, vector float t, uint unit, uint level, uint face, vector float colors[4]); +extern void +sample_texture_2d_bilinear_int(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + extern void -sample_texture4_lod(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]); +sample_texture_2d_lod(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); extern void -sample_texture4_cube(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level_ignored, uint face_ignored, - vector float colors[4]); +sample_texture_cube(vector float s, vector float t, vector float r, + uint unit, vector float colors[4]); #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From 8b3af5c5d6fe100707da0d9dcc42500921792638 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 12:12:30 -0600 Subject: cell: use simd utilities for pow, exp2, log2 --- src/gallium/drivers/cell/spu/spu_funcs.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_funcs.c') diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 3534b35000..ff3d609d25 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -38,7 +38,9 @@ #include #include #include -#include +#include +#include +#include #include "cell/common.h" #include "spu_main.h" @@ -68,37 +70,19 @@ spu_sin(vector float x) static vector float spu_pow(vector float x, vector float y) { - float z0 = powf(spu_extract(x,0), spu_extract(y,0)); - float z1 = powf(spu_extract(x,1), spu_extract(y,1)); - float z2 = powf(spu_extract(x,2), spu_extract(y,2)); - float z3 = powf(spu_extract(x,3), spu_extract(y,3)); - return (vector float) {z0, z1, z2, z3}; + return _powf4(x, y); } static vector float spu_exp2(vector float x) { - float z0 = powf(2.0f, spu_extract(x,0)); - float z1 = powf(2.0f, spu_extract(x,1)); - float z2 = powf(2.0f, spu_extract(x,2)); - float z3 = powf(2.0f, spu_extract(x,3)); - return (vector float) {z0, z1, z2, z3}; + return _exp2f4(x); } static vector float spu_log2(vector float x) { - /* - * log_base_2(x) = log(x) / log(2) - * 1.442695 = 1/log(2). - */ - static const vector float k = {1.442695F, 1.442695F, 1.442695F, 1.442695F}; - float z0 = logf(spu_extract(x,0)); - float z1 = logf(spu_extract(x,1)); - float z2 = logf(spu_extract(x,2)); - float z3 = logf(spu_extract(x,3)); - vector float v = (vector float) {z0, z1, z2, z3}; - return spu_mul(v, k); + return _log2f4(x); } -- cgit v1.2.3