diff options
Diffstat (limited to 'src/gallium')
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 536 | 
1 files changed, 251 insertions, 285 deletions
| diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 01aa3b8f20..78adeab0ae 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -83,30 +83,6 @@ struct lp_build_tgsi_soa_context  /** - * Function call helpers. - */ - -/** - * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be  - * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee - * that the stack pointer is 16 byte aligned, as expected. - */ -static void -emit_func_call( -   struct lp_build_tgsi_soa_context *bld, -   const LLVMValueRef *args, -   unsigned nr_args, -   void (PIPE_CDECL *code)() ) -{ -#if 0 -   LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global, -                             void* Addr); -#endif - -} - - -/**   * Register fetch.   */ @@ -495,7 +471,9 @@ emit_instruction(     struct tgsi_full_instruction *inst )  {     unsigned chan_index; -   LLVMValueRef tmp; +   LLVMValueRef src0, src1, src2; +   LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; +   LLVMValueRef dst0;     /* we can't handle indirect addressing into temp register file yet */     if (indirect_temp_reference(inst)) @@ -505,10 +483,10 @@ emit_instruction(  #if 0     case TGSI_OPCODE_ARL:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); +         tmp0 = FETCH( bld, *inst, 0, chan_index );           emit_flr(bld, 0, 0);           emit_f2it( bld, 0 ); -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break;  #endif @@ -516,7 +494,8 @@ emit_instruction(     case TGSI_OPCODE_MOV:     case TGSI_OPCODE_SWZ:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, chan_index, FETCH( bld, *inst, 0, chan_index ) ); +         tmp0 = FETCH( bld, *inst, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; @@ -524,33 +503,24 @@ emit_instruction(     case TGSI_OPCODE_LIT:        if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||            IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { -         emit_tempf( -            bld, -            0, -            TEMP_ONE_I, -            TEMP_ONE_C); +         tmp0 = bld->base.one;           if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { -            STORE( bld, *inst, 0, 0, CHAN_X ); +            STORE( bld, *inst, 0, CHAN_X, tmp0);           }           if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { -            STORE( bld, *inst, 0, 0, CHAN_W ); +            STORE( bld, *inst, 0, CHAN_W, tmp0);           }        }        if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||            IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {           if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { -            tmp = FETCH( bld, *inst, 0, 0, CHAN_X ); -            sse_maxps( -               bld, -               make_xmm( 0 ), -               get_temp( -                  TGSI_EXEC_TEMP_00000000_I, -                  TGSI_EXEC_TEMP_00000000_C ) ); -            STORE( bld, *inst, 0, 0, CHAN_Y ); +            tmp0 = FETCH( bld, *inst, 0, CHAN_X ); +            tmp0 = lp_build_max( &bld->base, tmp0, bld->base.one); +            STORE( bld, *inst, 0, CHAN_Y, tmp0);           }           if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {              /* XMM[1] = SrcReg[0].yyyy */ -            FETCH( bld, *inst, 1, 0, CHAN_Y ); +            tmp1 = FETCH( bld, *inst, 0, CHAN_Y );              /* XMM[1] = max(XMM[1], 0) */              sse_maxps(                 bld, @@ -559,7 +529,7 @@ emit_instruction(                    TGSI_EXEC_TEMP_00000000_I,                    TGSI_EXEC_TEMP_00000000_C ) );              /* XMM[2] = SrcReg[0].wwww */ -            FETCH( bld, *inst, 2, 0, CHAN_W ); +            tmp2 = FETCH( bld, *inst, 0, CHAN_W );              /* XMM[2] = min(XMM[2], 128.0) */              sse_minps(                 bld, @@ -574,8 +544,8 @@ emit_instruction(                 get_temp(                    TGSI_EXEC_TEMP_MINUS_128_I,                    TGSI_EXEC_TEMP_MINUS_128_C ) ); -            emit_pow( bld, 3, 1, 1, 2 ); -            FETCH( bld, *inst, 0, 0, CHAN_X ); +            tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); +            tmp0 = FETCH( bld, *inst, 0, CHAN_X );              sse_xorps(                 bld,                 make_xmm( 2 ), @@ -589,7 +559,7 @@ emit_instruction(                 bld,                 make_xmm( 2 ),                 make_xmm( 1 ) ); -            STORE( bld, *inst, 2, 0, CHAN_Z ); +            STORE( bld, *inst, 0, CHAN_Z, tmp2);           }        }        break; @@ -597,20 +567,20 @@ emit_instruction(     case TGSI_OPCODE_RCP:     /* TGSI_OPCODE_RECIP */ -      tmp = FETCH( bld, *inst, 0, CHAN_X ); -      tmp = lp_build_rcp(&bld->base, tmp); +      src0 = FETCH( bld, *inst, 0, CHAN_X ); +      dst0 = lp_build_rcp(&bld->base, src0);        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, chan_index, tmp ); +         STORE( bld, *inst, 0, chan_index, dst0 );        }        break;     case TGSI_OPCODE_RSQ:     /* TGSI_OPCODE_RECIPSQRT */ -      tmp = FETCH( bld, *inst, 0, CHAN_X ); -      tmp = lp_build_abs(&bld->base, tmp); -      tmp = lp_build_rsqrt(&bld->base, tmp); +      src0 = FETCH( bld, *inst, 0, CHAN_X ); +      src0 = lp_build_abs(&bld->base, src0); +      dst0 = lp_build_rsqrt(&bld->base, src0);        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, chan_index, tmp ); +         STORE( bld, *inst, 0, chan_index, dst0 );        }        break; @@ -619,34 +589,34 @@ emit_instruction(        if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||            IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||            IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { -         FETCH( bld, *inst, 0, 0, CHAN_X ); +         tmp0 = FETCH( bld, *inst, 0, CHAN_X );           if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||               IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { -            emit_MOV( bld, 1, 0 ); +            tmp1 = tmp0;              emit_flr( bld, 2, 1 );              /* dst.x = ex2(floor(src.x)) */              if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { -               emit_MOV( bld, 2, 1 ); -               emit_ex2( bld, 3, 2 ); -               STORE( bld, *inst, 2, 0, CHAN_X ); +               tmp2 = tmp1; +               tmp2 = lp_build_exp2( &bld->base, tmp2); +               STORE( bld, *inst, 0, CHAN_X, tmp2);              }              /* dst.y = src.x - floor(src.x) */              if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { -               emit_MOV( bld, 2, 0 ); -               emit_sub( bld, 2, 1 ); -               STORE( bld, *inst, 2, 0, CHAN_Y ); +               tmp2 = tmp0; +               tmp2 = lp_build_sub( &bld->base, tmp2, tmp1); +               STORE( bld, *inst, 0, CHAN_Y, tmp2);              }           }           /* dst.z = ex2(src.x) */           if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { -            emit_ex2( bld, 3, 0 ); -            STORE( bld, *inst, 0, 0, CHAN_Z ); +            tmp0 = lp_build_exp2( &bld->base, tmp0); +            STORE( bld, *inst, 0, CHAN_Z, tmp0);           }        }        /* dst.w = 1.0 */        if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { -         emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C ); -         STORE( bld, *inst, 0, 0, CHAN_W ); +         tmp0 = bld->base.one; +         STORE( bld, *inst, 0, CHAN_W, tmp0);        }        break;  #endif @@ -656,97 +626,97 @@ emit_instruction(        if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||            IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||            IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { -         FETCH( bld, *inst, 0, 0, CHAN_X ); -         emit_abs( bld, 0 ); -         emit_MOV( bld, 1, 0 ); -         emit_lg2( bld, 2, 1 ); +         tmp0 = FETCH( bld, *inst, 0, CHAN_X ); +         tmp0 = lp_build_abs( &bld->base, tmp0 ); +         tmp1 = tmp0; +         tmp1 = lp_build_log2( &bld->base, tmp1);           /* dst.z = lg2(abs(src.x)) */           if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { -            STORE( bld, *inst, 1, 0, CHAN_Z ); +            STORE( bld, *inst, 0, CHAN_Z, tmp1);           }           if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||               IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {              emit_flr( bld, 2, 1 );              /* dst.x = floor(lg2(abs(src.x))) */              if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { -               STORE( bld, *inst, 1, 0, CHAN_X ); +               STORE( bld, *inst, 0, CHAN_X, tmp1);              }              /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */              if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { -               emit_ex2( bld, 2, 1 ); +               tmp1 = lp_build_exp2( &bld->base, tmp1);                 emit_rcp( bld, 1, 1 ); -               emit_mul( bld, 0, 1 ); -               STORE( bld, *inst, 0, 0, CHAN_Y ); +               tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); +               STORE( bld, *inst, 0, CHAN_Y, tmp0);              }           }        }        /* dst.w = 1.0 */        if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {           emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C ); -         STORE( bld, *inst, 0, 0, CHAN_W ); +         STORE( bld, *inst, 0, CHAN_W, tmp0);        }        break;  #endif     case TGSI_OPCODE_MUL:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         LLVMValueRef a = FETCH( bld, *inst, 0, chan_index ); -         LLVMValueRef b = FETCH( bld, *inst, 1, chan_index ); -         tmp = lp_build_mul(&bld->base, a, b); -         STORE( bld, *inst, 0, chan_index, tmp ); +         src0 = FETCH( bld, *inst, 0, chan_index ); +         src1 = FETCH( bld, *inst, 1, chan_index ); +         dst0 = lp_build_mul(&bld->base, src0, src1); +         STORE( bld, *inst, 0, chan_index, dst0);        }        break;     case TGSI_OPCODE_ADD:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         LLVMValueRef a = FETCH( bld, *inst, 0, chan_index ); -         LLVMValueRef b = FETCH( bld, *inst, 1, chan_index ); -         tmp = lp_build_add(&bld->base, a, b); -         STORE( bld, *inst, 0, chan_index, tmp ); +         src0 = FETCH( bld, *inst, 0, chan_index ); +         src1 = FETCH( bld, *inst, 1, chan_index ); +         dst0 = lp_build_add(&bld->base, src0, src1); +         STORE( bld, *inst, 0, chan_index, dst0);        }        break; -#if 0     case TGSI_OPCODE_DP3:     /* TGSI_OPCODE_DOT3 */ -      FETCH( bld, *inst, 0, 0, CHAN_X ); -      FETCH( bld, *inst, 1, 1, CHAN_X ); -      emit_mul( bld, 0, 1 ); -      FETCH( bld, *inst, 1, 0, CHAN_Y ); -      FETCH( bld, *inst, 2, 1, CHAN_Y ); -      emit_mul( bld, 1, 2 ); -      emit_add( bld, 0, 1 ); -      FETCH( bld, *inst, 1, 0, CHAN_Z ); -      FETCH( bld, *inst, 2, 1, CHAN_Z ); -      emit_mul( bld, 1, 2 ); -      emit_add( bld, 0, 1 ); +      tmp0 = FETCH( bld, *inst, 0, CHAN_X ); +      tmp1 = FETCH( bld, *inst, 1, CHAN_X ); +      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); +      tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); +      tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); +      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); +      tmp0 = lp_build_add( &bld->base, tmp0, tmp1); +      tmp1 = FETCH( bld, *inst, 0, CHAN_Z ); +      tmp2 = FETCH( bld, *inst, 1, CHAN_Z ); +      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); +      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break;     case TGSI_OPCODE_DP4:     /* TGSI_OPCODE_DOT4 */ -      FETCH( bld, *inst, 0, 0, CHAN_X ); -      FETCH( bld, *inst, 1, 1, CHAN_X ); -      emit_mul( bld, 0, 1 ); -      FETCH( bld, *inst, 1, 0, CHAN_Y ); -      FETCH( bld, *inst, 2, 1, CHAN_Y ); -      emit_mul( bld, 1, 2 ); -      emit_add( bld, 0, 1 ); -      FETCH( bld, *inst, 1, 0, CHAN_Z ); -      FETCH( bld, *inst, 2, 1, CHAN_Z ); -      emit_mul(bld, 1, 2 ); -      emit_add(bld, 0, 1 ); -      FETCH( bld, *inst, 1, 0, CHAN_W ); -      FETCH( bld, *inst, 2, 1, CHAN_W ); -      emit_mul( bld, 1, 2 ); -      emit_add( bld, 0, 1 ); +      tmp0 = FETCH( bld, *inst, 0, CHAN_X ); +      tmp1 = FETCH( bld, *inst, 1, CHAN_X ); +      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); +      tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); +      tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); +      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); +      tmp0 = lp_build_add( &bld->base, tmp0, tmp1); +      tmp1 = FETCH( bld, *inst, 0, CHAN_Z ); +      tmp2 = FETCH( bld, *inst, 1, CHAN_Z ); +      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); +      tmp0 = lp_build_add( &bld->base, tmp0, tmp1); +      tmp1 = FETCH( bld, *inst, 0, CHAN_W ); +      tmp2 = FETCH( bld, *inst, 1, CHAN_W ); +      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); +      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; +#if 0     case TGSI_OPCODE_DST:        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {           emit_tempf( @@ -754,48 +724,44 @@ emit_instruction(              0,              TEMP_ONE_I,              TEMP_ONE_C ); -         STORE( bld, *inst, 0, 0, CHAN_X ); +         STORE( bld, *inst, 0, CHAN_X, tmp0);        }        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { -         FETCH( bld, *inst, 0, 0, CHAN_Y ); -         FETCH( bld, *inst, 1, 1, CHAN_Y ); -         emit_mul( bld, 0, 1 ); -         STORE( bld, *inst, 0, 0, CHAN_Y ); +         tmp0 = FETCH( bld, *inst, 0, CHAN_Y ); +         tmp1 = FETCH( bld, *inst, 1, CHAN_Y ); +         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); +         STORE( bld, *inst, 0, CHAN_Y, tmp0);        }        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { -         FETCH( bld, *inst, 0, 0, CHAN_Z ); -         STORE( bld, *inst, 0, 0, CHAN_Z ); +         tmp0 = FETCH( bld, *inst, 0, CHAN_Z ); +         STORE( bld, *inst, 0, CHAN_Z, tmp0);        }        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { -         FETCH( bld, *inst, 0, 1, CHAN_W ); -         STORE( bld, *inst, 0, 0, CHAN_W ); +         tmp0 = FETCH( bld, *inst, 1, CHAN_W ); +         STORE( bld, *inst, 0, CHAN_W, tmp0);        }        break; +#endif     case TGSI_OPCODE_MIN:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); -         FETCH( bld, *inst, 1, 1, chan_index ); -         sse_minps( -            bld, -            make_xmm( 0 ), -            make_xmm( 1 ) ); -         STORE( bld, *inst, 0, 0, chan_index ); +         src0 = FETCH( bld, *inst, 0, chan_index ); +         src1 = FETCH( bld, *inst, 1, chan_index ); +         dst0 = lp_build_min( &bld->base, src0, src1 ); +         STORE( bld, *inst, 0, chan_index, dst0);        }        break;     case TGSI_OPCODE_MAX:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); -         FETCH( bld, *inst, 1, 1, chan_index ); -         sse_maxps( -            bld, -            make_xmm( 0 ), -            make_xmm( 1 ) ); -         STORE( bld, *inst, 0, 0, chan_index ); +         src0 = FETCH( bld, *inst, 0, chan_index ); +         src1 = FETCH( bld, *inst, 1, chan_index ); +         dst0 = lp_build_max( &bld->base, src0, src1 ); +         STORE( bld, *inst, 0, chan_index, dst0);        }        break; +#if 0     case TGSI_OPCODE_SLT:     /* TGSI_OPCODE_SETLT */        emit_setcc( bld, inst, cc_LessThan ); @@ -809,33 +775,34 @@ emit_instruction(     case TGSI_OPCODE_MAD:     /* TGSI_OPCODE_MADD */        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); -         FETCH( bld, *inst, 1, 1, chan_index ); -         FETCH( bld, *inst, 2, 2, chan_index ); -         emit_mul( bld, 0, 1 ); -         emit_add( bld, 0, 2 ); -         STORE( bld, *inst, 0, 0, chan_index ); +         tmp0 = FETCH( bld, *inst, 0, chan_index ); +         tmp1 = FETCH( bld, *inst, 1, chan_index ); +         tmp2 = FETCH( bld, *inst, 2, chan_index ); +         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); +         tmp0 = lp_build_add( &bld->base, tmp0, tmp2); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break;     case TGSI_OPCODE_SUB:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); -         FETCH( bld, *inst, 1, 1, chan_index ); -         emit_sub( bld, 0, 1 ); -         STORE( bld, *inst, 0, 0, chan_index ); +         tmp0 = FETCH( bld, *inst, 0, chan_index ); +         tmp1 = FETCH( bld, *inst, 1, chan_index ); +         tmp0 = lp_build_sub( &bld->base, tmp0, tmp1); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; +#endif     case TGSI_OPCODE_LRP:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); -         FETCH( bld, *inst, 1, 1, chan_index ); -         FETCH( bld, *inst, 2, 2, chan_index ); -         emit_sub( bld, 1, 2 ); -         emit_mul( bld, 0, 1 ); -         emit_add( bld, 0, 2 ); -         STORE( bld, *inst, 0, 0, chan_index ); +         src0 = FETCH( bld, *inst, 0, chan_index ); +         src1 = FETCH( bld, *inst, 1, chan_index ); +         src2 = FETCH( bld, *inst, 2, chan_index ); +         tmp0 = lp_build_sub( &bld->base, src1, src2 ); +         tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); +         dst0 = lp_build_add( &bld->base, tmp0, src2 ); +         STORE( bld, *inst, 0, chan_index, dst0 );        }        break; @@ -848,25 +815,26 @@ emit_instruction(        break;     case TGSI_OPCODE_DP2A: -      FETCH( bld, *inst, 0, 0, CHAN_X );  /* xmm0 = src[0].x */ -      FETCH( bld, *inst, 1, 1, CHAN_X );  /* xmm1 = src[1].x */ -      emit_mul( bld, 0, 1 );              /* xmm0 = xmm0 * xmm1 */ -      FETCH( bld, *inst, 1, 0, CHAN_Y );  /* xmm1 = src[0].y */ -      FETCH( bld, *inst, 2, 1, CHAN_Y );  /* xmm2 = src[1].y */ -      emit_mul( bld, 1, 2 );              /* xmm1 = xmm1 * xmm2 */ -      emit_add( bld, 0, 1 );              /* xmm0 = xmm0 + xmm1 */ -      FETCH( bld, *inst, 1, 2, CHAN_X );  /* xmm1 = src[2].x */ -      emit_add( bld, 0, 1 );              /* xmm0 = xmm0 + xmm1 */ +      tmp0 = FETCH( bld, *inst, 0, CHAN_X );  /* xmm0 = src[0].x */ +      tmp1 = FETCH( bld, *inst, 1, CHAN_X );  /* xmm1 = src[1].x */ +      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */ +      tmp1 = FETCH( bld, *inst, 0, CHAN_Y );  /* xmm1 = src[0].y */ +      tmp2 = FETCH( bld, *inst, 1, CHAN_Y );  /* xmm2 = src[1].y */ +      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */ +      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */ +      tmp1 = FETCH( bld, *inst, 2, CHAN_X );  /* xmm1 = src[2].x */ +      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, 0, chan_index );  /* dest[ch] = xmm0 */ +         STORE( bld, *inst, 0, chan_index, tmp0);  /* dest[ch] = xmm0 */        }        break; +#if 0     case TGSI_OPCODE_FRC:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); +         tmp0 = FETCH( bld, *inst, 0, chan_index );           emit_frc( bld, 0, 0 ); -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; @@ -876,97 +844,94 @@ emit_instruction(     case TGSI_OPCODE_FLR:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); +         tmp0 = FETCH( bld, *inst, 0, chan_index );           emit_flr( bld, 0, 0 ); -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break;     case TGSI_OPCODE_ROUND:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); +         tmp0 = FETCH( bld, *inst, 0, chan_index );           emit_rnd( bld, 0, 0 ); -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; +#endif -   case TGSI_OPCODE_EX2: -      FETCH( bld, *inst, 0, 0, CHAN_X ); -      emit_ex2( bld, 0, 0 ); +   case TGSI_OPCODE_EX2: { +      tmp0 = FETCH( bld, *inst, 0, CHAN_X ); +      tmp0 = lp_build_exp2( &bld->base, tmp0);        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; +   }     case TGSI_OPCODE_LG2: -      FETCH( bld, *inst, 0, 0, CHAN_X ); -      emit_lg2( bld, 0, 0 ); +      tmp0 = FETCH( bld, *inst, 0, CHAN_X ); +      tmp0 = lp_build_log2( &bld->base, tmp0);        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break;     case TGSI_OPCODE_POW: -      FETCH( bld, *inst, 0, 0, CHAN_X ); -      FETCH( bld, *inst, 1, 1, CHAN_X ); -      emit_pow( bld, 0, 0, 0, 1 ); +      src0 = FETCH( bld, *inst, 0, CHAN_X ); +      src1 = FETCH( bld, *inst, 1, CHAN_X ); +      dst0 = lp_build_pow( &bld->base, src0, src1 );        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, dst0 );        }        break;     case TGSI_OPCODE_XPD:        if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||            IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { -         FETCH( bld, *inst, 1, 1, CHAN_Z ); -         FETCH( bld, *inst, 3, 0, CHAN_Z ); +         tmp1 = FETCH( bld, *inst, 1, CHAN_Z ); +         tmp3 = FETCH( bld, *inst, 0, CHAN_Z );        }        if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||            IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { -         FETCH( bld, *inst, 0, 0, CHAN_Y ); -         FETCH( bld, *inst, 4, 1, CHAN_Y ); +         tmp0 = FETCH( bld, *inst, 0, CHAN_Y ); +         tmp4 = FETCH( bld, *inst, 1, CHAN_Y );        }        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { -         emit_MOV( bld, 2, 0 ); -         emit_mul( bld, 2, 1 ); -         emit_MOV( bld, 5, 3 ); -         emit_mul( bld, 5, 4 ); -         emit_sub( bld, 2, 5 ); -         STORE( bld, *inst, 2, 0, CHAN_X ); +         tmp2 = tmp0; +         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); +         tmp5 = tmp3; +         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); +         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); +         STORE( bld, *inst, 0, CHAN_X, tmp2);        }        if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||            IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { -         FETCH( bld, *inst, 2, 1, CHAN_X ); -         FETCH( bld, *inst, 5, 0, CHAN_X ); +         tmp2 = FETCH( bld, *inst, 1, CHAN_X ); +         tmp5 = FETCH( bld, *inst, 0, CHAN_X );        }        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { -         emit_mul( bld, 3, 2 ); -         emit_mul( bld, 1, 5 ); -         emit_sub( bld, 3, 1 ); -         STORE( bld, *inst, 3, 0, CHAN_Y ); +         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); +         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); +         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); +         STORE( bld, *inst, 0, CHAN_Y, tmp3);        }        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { -         emit_mul( bld, 5, 4 ); -         emit_mul( bld, 0, 2 ); -         emit_sub( bld, 5, 0 ); -         STORE( bld, *inst, 5, 0, CHAN_Z ); +         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); +         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); +         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); +         STORE( bld, *inst, 0, CHAN_Z, tmp5);        }        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { -	 emit_tempf( -	    bld, -	    0, -	    TEMP_ONE_I, -	    TEMP_ONE_C ); -         STORE( bld, *inst, 0, 0, CHAN_W ); +         tmp0 = bld->base.one; +         STORE( bld, *inst, 0, CHAN_W, tmp0);        }        break;     case TGSI_OPCODE_ABS:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); -         emit_abs( bld, 0) ; - -         STORE( bld, *inst, 0, 0, chan_index ); +         tmp0 = FETCH( bld, *inst, 0, chan_index ); +         tmp0 = lp_build_abs( &bld->base, tmp0 ) ; +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; @@ -975,29 +940,29 @@ emit_instruction(        break;     case TGSI_OPCODE_DPH: -      FETCH( bld, *inst, 0, 0, CHAN_X ); -      FETCH( bld, *inst, 1, 1, CHAN_X ); -      emit_mul( bld, 0, 1 ); -      FETCH( bld, *inst, 1, 0, CHAN_Y ); -      FETCH( bld, *inst, 2, 1, CHAN_Y ); -      emit_mul( bld, 1, 2 ); -      emit_add( bld, 0, 1 ); -      FETCH( bld, *inst, 1, 0, CHAN_Z ); -      FETCH( bld, *inst, 2, 1, CHAN_Z ); -      emit_mul( bld, 1, 2 ); -      emit_add( bld, 0, 1 ); -      FETCH( bld, *inst, 1, 1, CHAN_W ); -      emit_add( bld, 0, 1 ); +      tmp0 = FETCH( bld, *inst, 0, CHAN_X ); +      tmp1 = FETCH( bld, *inst, 1, CHAN_X ); +      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); +      tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); +      tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); +      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); +      tmp0 = lp_build_add( &bld->base, tmp0, tmp1); +      tmp1 = FETCH( bld, *inst, 0, CHAN_Z ); +      tmp2 = FETCH( bld, *inst, 1, CHAN_Z ); +      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); +      tmp0 = lp_build_add( &bld->base, tmp0, tmp1); +      tmp1 = FETCH( bld, *inst, 1, CHAN_W ); +      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break;     case TGSI_OPCODE_COS: -      FETCH( bld, *inst, 0, 0, CHAN_X ); -      emit_cos( bld, 0, 0 ); +      tmp0 = FETCH( bld, *inst, 0, CHAN_X ); +      tmp0 = lp_build_cos( &bld->base, tmp0 );        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; @@ -1009,6 +974,7 @@ emit_instruction(        return 0;        break; +#if 0     case TGSI_OPCODE_KILP:        /* predicated kill */        emit_kilp( bld ); @@ -1019,6 +985,7 @@ emit_instruction(        /* conditional kill */        emit_kil( bld, &inst->FullSrcRegisters[0] );        break; +#endif     case TGSI_OPCODE_PK2H:        return 0; @@ -1053,10 +1020,10 @@ emit_instruction(        break;     case TGSI_OPCODE_SIN: -      FETCH( bld, *inst, 0, 0, CHAN_X ); -      emit_sin( bld, 0, 0 ); +      tmp0 = FETCH( bld, *inst, 0, CHAN_X ); +      tmp0 = lp_build_sin( &bld->base, tmp0 );        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; @@ -1071,13 +1038,11 @@ emit_instruction(     case TGSI_OPCODE_STR:        return 0;        break; -#endif     case TGSI_OPCODE_TEX:        emit_tex( bld, inst, FALSE, FALSE );        break; -#if 0     case TGSI_OPCODE_TXD:        return 0;        break; @@ -1106,14 +1071,16 @@ emit_instruction(        return 0;        break; +#if 0     case TGSI_OPCODE_ARR:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); +         tmp0 = FETCH( bld, *inst, 0, chan_index );           emit_rnd( bld, 0, 0 );           emit_f2it( bld, 0 ); -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; +#endif     case TGSI_OPCODE_BRA:        return 0; @@ -1123,6 +1090,7 @@ emit_instruction(        return 0;        break; +#if 0     case TGSI_OPCODE_RET:        emit_ret( bld );        break; @@ -1135,9 +1103,9 @@ emit_instruction(     case TGSI_OPCODE_SSG:     /* TGSI_OPCODE_SGN */        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); +         tmp0 = FETCH( bld, *inst, 0, chan_index );           emit_sgn( bld, 0, 0 ); -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; @@ -1147,14 +1115,14 @@ emit_instruction(     case TGSI_OPCODE_SCS:        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { -         FETCH( bld, *inst, 0, 0, CHAN_X ); -         emit_cos( bld, 0, 0 ); -         STORE( bld, *inst, 0, 0, CHAN_X ); +         tmp0 = FETCH( bld, *inst, 0, CHAN_X ); +         tmp0 = lp_build_cos( &bld->base, tmp0 ); +         STORE( bld, *inst, 0, CHAN_X, tmp0);        }        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { -         FETCH( bld, *inst, 0, 0, CHAN_X ); -         emit_sin( bld, 0, 0 ); -         STORE( bld, *inst, 0, 0, CHAN_Y ); +         tmp0 = FETCH( bld, *inst, 0, CHAN_X ); +         tmp0 = lp_build_sin( &bld->base, tmp0 ); +         STORE( bld, *inst, 0, CHAN_Y, tmp0);        }        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {  	 emit_tempf( @@ -1162,7 +1130,7 @@ emit_instruction(  	    0,  	    TGSI_EXEC_TEMP_00000000_I,  	    TGSI_EXEC_TEMP_00000000_C ); -         STORE( bld, *inst, 0, 0, CHAN_Z ); +         STORE( bld, *inst, 0, CHAN_Z, tmp0);        }        IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {  	 emit_tempf( @@ -1170,7 +1138,7 @@ emit_instruction(  	    0,  	    TEMP_ONE_I,  	    TEMP_ONE_C ); -         STORE( bld, *inst, 0, 0, CHAN_W ); +         STORE( bld, *inst, 0, CHAN_W, tmp0);        }        break;  #endif @@ -1179,7 +1147,6 @@ emit_instruction(        emit_tex( bld, inst, TRUE, FALSE );        break; -#if 0     case TGSI_OPCODE_NRM:        /* fall-through */     case TGSI_OPCODE_NRM4: @@ -1196,73 +1163,73 @@ emit_instruction(              /* xmm4 = src.x */              /* xmm0 = src.x * src.x */ -            FETCH(bld, *inst, 0, 0, CHAN_X); +            tmp0 = FETCH(bld, *inst, 0, CHAN_X);              if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { -               emit_MOV(bld, 4, 0); +               tmp4 = tmp0;              } -            emit_mul(bld, 0, 0); +            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);              /* xmm5 = src.y */              /* xmm0 = xmm0 + src.y * src.y */ -            FETCH(bld, *inst, 1, 0, CHAN_Y); +            tmp1 = FETCH(bld, *inst, 0, CHAN_Y);              if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { -               emit_MOV(bld, 5, 1); +               tmp5 = tmp1;              } -            emit_mul(bld, 1, 1); -            emit_add(bld, 0, 1); +            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); +            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm6 = src.z */              /* xmm0 = xmm0 + src.z * src.z */ -            FETCH(bld, *inst, 1, 0, CHAN_Z); +            tmp1 = FETCH(bld, *inst, 0, CHAN_Z);              if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { -               emit_MOV(bld, 6, 1); +               tmp6 = tmp1;              } -            emit_mul(bld, 1, 1); -            emit_add(bld, 0, 1); +            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); +            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              if (dims == 4) {                 /* xmm7 = src.w */                 /* xmm0 = xmm0 + src.w * src.w */ -               FETCH(bld, *inst, 1, 0, CHAN_W); +               tmp1 = FETCH(bld, *inst, 0, CHAN_W);                 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { -                  emit_MOV(bld, 7, 1); +                  tmp7 = tmp1;                 } -               emit_mul(bld, 1, 1); -               emit_add(bld, 0, 1); +               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); +               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              }              /* xmm1 = 1 / sqrt(xmm0) */ -            emit_rsqrt(bld, 1, 0); +            tmp1 = lp_build_rsqrt( &bld->base, tmp0);              /* dst.x = xmm1 * src.x */              if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { -               emit_mul(bld, 4, 1); -               STORE(bld, *inst, 4, 0, CHAN_X); +               tmp4 = lp_build_mul( &bld->base, tmp4, tmp1); +               STORE(bld, *inst, 0, CHAN_X, tmp4);              }              /* dst.y = xmm1 * src.y */              if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { -               emit_mul(bld, 5, 1); -               STORE(bld, *inst, 5, 0, CHAN_Y); +               tmp5 = lp_build_mul( &bld->base, tmp5, tmp1); +               STORE(bld, *inst, 0, CHAN_Y, tmp5);              }              /* dst.z = xmm1 * src.z */              if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { -               emit_mul(bld, 6, 1); -               STORE(bld, *inst, 6, 0, CHAN_Z); +               tmp6 = lp_build_mul( &bld->base, tmp6, tmp1); +               STORE(bld, *inst, 0, CHAN_Z, tmp6);              }              /* dst.w = xmm1 * src.w */              if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) { -               emit_mul(bld, 7, 1); -               STORE(bld, *inst, 7, 0, CHAN_W); +               tmp7 = lp_build_mul( &bld->base, tmp7, tmp1); +               STORE(bld, *inst, 0, CHAN_W, tmp7);              }           }           /* dst0.w = 1.0 */           if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) { -            emit_tempf(bld, 0, TEMP_ONE_I, TEMP_ONE_C); -            STORE(bld, *inst, 0, 0, CHAN_W); +            tmp0 = bld->base.one; +            STORE(bld, *inst, 0, CHAN_W, tmp0);           }        }        break; @@ -1272,18 +1239,17 @@ emit_instruction(        break;     case TGSI_OPCODE_DP2: -      FETCH( bld, *inst, 0, 0, CHAN_X );  /* xmm0 = src[0].x */ -      FETCH( bld, *inst, 1, 1, CHAN_X );  /* xmm1 = src[1].x */ -      emit_mul( bld, 0, 1 );              /* xmm0 = xmm0 * xmm1 */ -      FETCH( bld, *inst, 1, 0, CHAN_Y );  /* xmm1 = src[0].y */ -      FETCH( bld, *inst, 2, 1, CHAN_Y );  /* xmm2 = src[1].y */ -      emit_mul( bld, 1, 2 );              /* xmm1 = xmm1 * xmm2 */ -      emit_add( bld, 0, 1 );              /* xmm0 = xmm0 + xmm1 */ +      tmp0 = FETCH( bld, *inst, 0, CHAN_X );  /* xmm0 = src[0].x */ +      tmp1 = FETCH( bld, *inst, 1, CHAN_X );  /* xmm1 = src[1].x */ +      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */ +      tmp1 = FETCH( bld, *inst, 0, CHAN_Y );  /* xmm1 = src[0].y */ +      tmp2 = FETCH( bld, *inst, 1, CHAN_Y );  /* xmm2 = src[1].y */ +      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */ +      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         STORE( bld, *inst, 0, 0, chan_index );  /* dest[ch] = xmm0 */ +         STORE( bld, *inst, 0, chan_index, tmp0);  /* dest[ch] = xmm0 */        }        break; -#endif     case TGSI_OPCODE_TXL:        emit_tex( bld, inst, TRUE, FALSE ); @@ -1293,7 +1259,6 @@ emit_instruction(        emit_tex( bld, inst, FALSE, TRUE );        break; -#if 0     case TGSI_OPCODE_BRK:        return 0;        break; @@ -1346,14 +1311,16 @@ emit_instruction(        return 0;        break; +#if 0     case TGSI_OPCODE_TRUNC:        FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -         FETCH( bld, *inst, 0, 0, chan_index ); +         tmp0 = FETCH( bld, *inst, 0, chan_index );           emit_f2it( bld, 0 );           emit_i2f( bld, 0 ); -         STORE( bld, *inst, 0, 0, chan_index ); +         STORE( bld, *inst, 0, chan_index, tmp0);        }        break; +#endif     case TGSI_OPCODE_SHL:        return 0; @@ -1402,7 +1369,6 @@ emit_instruction(     case TGSI_OPCODE_ENDPRIM:        return 0;        break; -#endif     default:        return 0; | 
