diff options
| -rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_aos.c | 28 | ||||
| -rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 1 | ||||
| -rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 1 | 
3 files changed, 20 insertions, 10 deletions
| diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 0c693a4a65..f4c6705bae 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1559,7 +1559,6 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst   */  static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )  { -     if (0) {        struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);        struct x86_reg r = aos_get_xmm_reg(cp); @@ -1568,21 +1567,30 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst        return TRUE;     }     else { -      struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); -      struct x86_reg r = aos_get_xmm_reg(cp); +      struct x86_reg arg0           = fetch_src(cp, &op->FullSrcRegisters[0]); +      struct x86_reg r              = aos_get_xmm_reg(cp);        struct x86_reg neg_half       = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ );        struct x86_reg one_point_five = x86_make_disp( neg_half, 4 );        struct x86_reg src            = get_xmm_writable( cp, arg0 ); -       -      sse_rsqrtss( cp->func, r, src  );             /* rsqrtss(a) */ -      sse_mulss(   cp->func, src, neg_half  );      /* -.5 * a */ -      sse_mulss(   cp->func, src,  r );             /* -.5 * a * r */ -      sse_mulss(   cp->func, src,  r );             /* -.5 * a * r * r */ -      sse_addss(   cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */ -      sse_mulss(   cp->func, r,  src );             /* r * (1.5 - .5 * a * r * r) */ +      struct x86_reg neg            = aos_get_internal(cp, IMM_NEGS); +      struct x86_reg tmp            = aos_get_xmm_reg(cp); + +      sse_movaps(cp->func, tmp, src); +      sse_mulps(cp->func, tmp, neg); +      sse_maxps(cp->func, tmp, src); +    +      sse_rsqrtss( cp->func, r, tmp  );             /* rsqrtss(a) */ +      sse_mulss(   cp->func, tmp, neg_half  );      /* -.5 * a */ +      sse_mulss(   cp->func, tmp,  r );             /* -.5 * a * r */ +      sse_mulss(   cp->func, tmp,  r );             /* -.5 * a * r * r */ +      sse_addss(   cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */ +      sse_mulss(   cp->func, r,  tmp );             /* r * (1.5 - .5 * a * r * r) */        store_scalar_dest(cp, &op->FullDstRegisters[0], r); + +      aos_release_xmm_reg(cp, tmp.idx); +        return TRUE;     }  } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index ab641efb60..5c5d8d2550 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1889,6 +1889,7 @@ exec_instruction(     case TGSI_OPCODE_RSQ:     /* TGSI_OPCODE_RECIPSQRT */        FETCH( &r[0], 0, CHAN_X ); +      micro_abs( &r[0], &r[0] );        micro_sqrt( &r[0], &r[0] );        micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );        FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 481ba89c5e..a183603aea 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1575,6 +1575,7 @@ emit_instruction(     case TGSI_OPCODE_RSQ:     /* TGSI_OPCODE_RECIPSQRT */        FETCH( func, *inst, 0, 0, CHAN_X ); +      emit_abs( func, 0 );        emit_rsqrt( func, 1, 0 );        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {           STORE( func, *inst, 1, 0, chan_index ); | 
