From c51938afe1626bfccfe38fe2f508bf90e58ca74c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 16 Feb 2009 08:25:33 -0700 Subject: cell: use some SPU intrinsics to get slightly better code in eval_inputs() Suggested by Jonathan Adamczewski. There may be more places to do this... --- src/gallium/drivers/cell/spu/spu_tri.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 04e4584b25..d727268475 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -219,10 +219,13 @@ eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[]) /* linear term */ vector float dadx = setup.coef[attr].dadx; vector float dady = setup.coef[attr].dady; - r0 += fragX * splatx(dadx) + fragY * splatx(dady); - r1 += fragX * splaty(dadx) + fragY * splaty(dady); - r2 += fragX * splatz(dadx) + fragY * splatz(dady); - r3 += fragX * splatw(dadx) + fragY * splatw(dady); + /* Use SPU intrinsics here to get slightly better code. + * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady); + */ + r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0)); + r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1)); + r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2)); + r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3)); if (interp == INTERP_PERSPECTIVE) { /* perspective term */ r0 *= wInv; -- cgit v1.2.3