summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
authorDave Airlie <airlied@linux.ie>2009-04-22 21:41:57 +1000
committerDave Airlie <airlied@linux.ie>2009-04-22 21:41:57 +1000
commit466c78c93538f2853449124c06274d538830cd5a (patch)
treea6bd88060ba328d8bed1ff2ea1a37ee741a90a17 /src/gallium/auxiliary/tgsi
parent65fe0c86ffcff99f9f09606d462bf3731ea0c308 (diff)
parentf057f6543da469f231d551cb5728d98df8add4fa (diff)
Merge remote branch 'origin/master' into radeon-rewrite
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c104
1 files changed, 78 insertions, 26 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index d70bcd03c5..4b4e34b29e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1477,6 +1477,7 @@ emit_instruction(
case TGSI_OPCODE_ARL:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
+ emit_flr(func, 0, 0);
emit_f2it( func, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1553,7 +1554,7 @@ emit_instruction(
func,
make_xmm( 2 ),
make_xmm( 0 ),
- cc_LessThanEqual );
+ cc_LessThan );
sse_andps(
func,
make_xmm( 2 ),
@@ -2177,32 +2178,83 @@ emit_instruction(
/* 3 or 4-component normalization */
{
uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
- /* note: cannot use xmm regs 2/3 here (see emit_rsqrt() above) */
- FETCH( func, *inst, 4, 0, CHAN_X ); /* xmm4 = src[0].x */
- FETCH( func, *inst, 5, 0, CHAN_Y ); /* xmm5 = src[0].y */
- FETCH( func, *inst, 6, 0, CHAN_Z ); /* xmm6 = src[0].z */
- if (dims == 4) {
- FETCH( func, *inst, 7, 0, CHAN_W ); /* xmm7 = src[0].w */
- }
- emit_MOV( func, 0, 4 ); /* xmm0 = xmm3 */
- emit_mul( func, 0, 4 ); /* xmm0 *= xmm3 */
- emit_MOV( func, 1, 5 ); /* xmm1 = xmm4 */
- emit_mul( func, 1, 5 ); /* xmm1 *= xmm4 */
- emit_add( func, 0, 1 ); /* xmm0 += xmm1 */
- emit_MOV( func, 1, 6 ); /* xmm1 = xmm5 */
- emit_mul( func, 1, 6 ); /* xmm1 *= xmm5 */
- emit_add( func, 0, 1 ); /* xmm0 += xmm1 */
- if (dims == 4) {
- emit_MOV( func, 1, 7 ); /* xmm1 = xmm7 */
- emit_mul( func, 1, 7 ); /* xmm1 *= xmm7 */
- emit_add( func, 0, 0 ); /* xmm0 += xmm1 */
- }
- emit_rsqrt( func, 1, 0 ); /* xmm1 = 1/sqrt(xmm0) */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- if (chan_index < dims) {
- emit_mul( func, 4+chan_index, 1); /* xmm[4+ch] *= xmm1 */
- STORE( func, *inst, 4+chan_index, 0, chan_index );
+
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) ||
+ (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 4)) {
+
+ /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
+
+ /* xmm4 = src.x */
+ /* xmm0 = src.x * src.x */
+ FETCH(func, *inst, 0, 0, CHAN_X);
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ emit_MOV(func, 4, 0);
}
+ emit_mul(func, 0, 0);
+
+ /* xmm5 = src.y */
+ /* xmm0 = xmm0 + src.y * src.y */
+ FETCH(func, *inst, 1, 0, CHAN_Y);
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ emit_MOV(func, 5, 1);
+ }
+ emit_mul(func, 1, 1);
+ emit_add(func, 0, 1);
+
+ /* xmm6 = src.z */
+ /* xmm0 = xmm0 + src.z * src.z */
+ FETCH(func, *inst, 1, 0, CHAN_Z);
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ emit_MOV(func, 6, 1);
+ }
+ emit_mul(func, 1, 1);
+ emit_add(func, 0, 1);
+
+ if (dims == 4) {
+ /* xmm7 = src.w */
+ /* xmm0 = xmm0 + src.w * src.w */
+ FETCH(func, *inst, 1, 0, CHAN_W);
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ emit_MOV(func, 7, 1);
+ }
+ emit_mul(func, 1, 1);
+ emit_add(func, 0, 1);
+ }
+
+ /* xmm1 = 1 / sqrt(xmm0) */
+ emit_rsqrt(func, 1, 0);
+
+ /* dst.x = xmm1 * src.x */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ emit_mul(func, 4, 1);
+ STORE(func, *inst, 4, 0, CHAN_X);
+ }
+
+ /* dst.y = xmm1 * src.y */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ emit_mul(func, 5, 1);
+ STORE(func, *inst, 5, 0, CHAN_Y);
+ }
+
+ /* dst.z = xmm1 * src.z */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ emit_mul(func, 6, 1);
+ STORE(func, *inst, 6, 0, CHAN_Z);
+ }
+
+ /* dst.w = xmm1 * src.w */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) {
+ emit_mul(func, 7, 1);
+ STORE(func, *inst, 7, 0, CHAN_W);
+ }
+ }
+
+ /* dst0.w = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) {
+ emit_tempf(func, 0, TEMP_ONE_I, TEMP_ONE_C);
+ STORE(func, *inst, 0, 0, CHAN_W);
}
}
break;