diff options
-rw-r--r-- | configs/linux-cell | 2 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_funcs.c | 65 |
2 files changed, 45 insertions, 22 deletions
diff --git a/configs/linux-cell b/configs/linux-cell index 86651b83d7..8d74ee469d 100644 --- a/configs/linux-cell +++ b/configs/linux-cell @@ -53,7 +53,7 @@ SPU_CFLAGS = $(OPT_FLAGS) -W -Wall -Winline -Wmissing-prototypes -Wno-main \ -DSPU_MAIN_PARAM_LONG_LONG \ -include spu_intrinsics.h -SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc +SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc -lm SPU_AR = ppu-ar SPU_AR_FLAGS = -qcs diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index b57ad3f3b8..1adf9de0e8 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -35,41 +35,61 @@ #include <string.h> #include <libmisc.h> -#include <cos8_v.h> -#include <sin8_v.h> +#include <math.h> +#include <cos14_v.h> +#include <sin14_v.h> #include "cell/common.h" #include "spu_main.h" #include "spu_funcs.h" -#define M_PI 3.1415926 - - static vector float spu_cos(vector float x) { -#if 0 - static const float scale = 1.0 / (2.0 * M_PI); - x = x * spu_splats(scale); /* normalize */ - return _cos8_v(x); -#else - /* just pass-through to avoid trashing caller's stack */ - return x; -#endif + return _cos14_v(x); } static vector float spu_sin(vector float x) { -#if 0 - static const float scale = 1.0 / (2.0 * M_PI); - x = x * spu_splats(scale); /* normalize */ - return _sin8_v(x); /* 8-bit accuracy enough?? */ -#else - /* just pass-through to avoid trashing caller's stack */ - return x; -#endif + return _sin14_v(x); +} + +static vector float +spu_pow(vector float x, vector float y) +{ + float z0 = powf(spu_extract(x,0), spu_extract(y,0)); + float z1 = powf(spu_extract(x,1), spu_extract(y,1)); + float z2 = powf(spu_extract(x,2), spu_extract(y,2)); + float z3 = powf(spu_extract(x,3), spu_extract(y,3)); + return (vector float) {z0, z1, z2, z3}; +} + +static vector float +spu_exp2(vector float x) +{ + float z0 = powf(2.0f, spu_extract(x,0)); + float z1 = powf(2.0f, spu_extract(x,1)); + float z2 = powf(2.0f, spu_extract(x,2)); + float z3 = powf(2.0f, spu_extract(x,3)); + return (vector float) {z0, z1, z2, z3}; +} + +static vector float +spu_log2(vector float x) +{ + /* + * log_base_2(x) = log(x) / log(2) + * 1.442695 = 1/log(2). + */ + static const vector float k = {1.442695F, 1.442695F, 1.442695F, 1.442695F}; + float z0 = logf(spu_extract(x,0)); + float z1 = logf(spu_extract(x,1)); + float z2 = logf(spu_extract(x,2)); + float z3 = logf(spu_extract(x,3)); + vector float v = (vector float) {z0, z1, z2, z3}; + return spu_mul(v, k); } @@ -101,6 +121,9 @@ return_function_info(void) funcs.num = 0; add_func(&funcs, "spu_cos", &spu_cos); add_func(&funcs, "spu_sin", &spu_sin); + add_func(&funcs, "spu_pow", &spu_pow); + add_func(&funcs, "spu_exp2", &spu_exp2); + add_func(&funcs, "spu_log2", &spu_log2); /* Send the function info back to the PPU / main memory */ mfc_put((void *) &funcs, /* src in local store */ |