diff options
| author | Pierre Ossman <pierre@ossman.eu> | 2009-11-01 22:28:02 +0100 | 
|---|---|---|
| committer | Alex Deucher <alexdeucher@gmail.com> | 2009-11-02 11:30:00 -0500 | 
| commit | 213ec8251cb3e859b41306eae4092d89592c33db (patch) | |
| tree | 2164500bac5566b4b4df74148da6567c7879ca3c | |
| parent | d00cbba403640c82683a876fa795cd638f1bbc24 (diff) | |
r600: implement LOG op in compiler
| -rw-r--r-- | src/mesa/drivers/dri/r600/r700_assembler.c | 216 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r600/r700_assembler.h | 1 | 
2 files changed, 214 insertions, 3 deletions
| diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index fbf1f29fa3..e0d7d4fa6b 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3037,6 +3037,217 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm)      return GL_TRUE;  } +GLboolean assemble_LOG(r700_AssemblerBase *pAsm) +{ +    BITS tmp1, tmp2, tmp3; + +    checkop1(pAsm); + +    tmp1 = gethelpr(pAsm); +    tmp2 = gethelpr(pAsm); +    tmp3 = gethelpr(pAsm); + +    // FIXME: The hardware can do fabs() directly on input +    //        elements, but the compiler doesn't have the +    //        capability to use that. + +    // MAX     tmp1.x,   a.x,    -a.x   (fabs(a.x)) + +    pAsm->D.dst.opcode = SQ_OP2_INST_MAX;   + +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); +    pAsm->D.dst.rtype  = DST_REG_TEMPORARY; +    pAsm->D.dst.reg    = tmp1; +    pAsm->D.dst.writex = 1; + +    if( GL_FALSE == assemble_src(pAsm, 0, -1) ) +    { +        return GL_FALSE; +    } +  +    pAsm->S[1].bits = pAsm->S[0].bits; +    flipneg_PVSSRC(&(pAsm->S[1].src)); + +    if ( GL_FALSE == next_ins(pAsm) )  +    { +        return GL_FALSE; +    } + +    // Entire algo: +    // +    // LG2     tmp2.x,   tmp1.x +    // FLOOR   tmp3.x,   tmp2.x +    // MOV     dst.x,    tmp3.x +    // ADD     tmp3.x,   tmp2.x,    -tmp3.x +    // EX2     dst.y,    tmp3.x +    // MOV     dst.z,    tmp2.x +    // MOV     dst.w,    1.0 + +    // LG2     tmp2.x,   tmp1.x +    // FLOOR   tmp3.x,   tmp2.x + +    pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; +    pAsm->D.dst.math = 1; + +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); +    pAsm->D.dst.rtype  = DST_REG_TEMPORARY; +    pAsm->D.dst.reg    = tmp2; +    pAsm->D.dst.writex = 1; + +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); +    pAsm->S[0].src.rtype = DST_REG_TEMPORARY; +    pAsm->S[0].src.reg   = tmp1; + +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); +    noneg_PVSSRC(&(pAsm->S[0].src)); + +    if( GL_FALSE == next_ins(pAsm) ) +    { +        return GL_FALSE; +    } + +    pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); +    pAsm->D.dst.rtype  = DST_REG_TEMPORARY; +    pAsm->D.dst.reg    = tmp3; +    pAsm->D.dst.writex = 1; + +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); +    pAsm->S[0].src.rtype = DST_REG_TEMPORARY; +    pAsm->S[0].src.reg   = tmp2; + +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); +    noneg_PVSSRC(&(pAsm->S[0].src)); + +    if( GL_FALSE == next_ins(pAsm) ) +    { +        return GL_FALSE; +    } + +    // MOV     dst.x,    tmp3.x + +    pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + +    if( GL_FALSE == assemble_dst(pAsm) ) +    { +        return GL_FALSE; +    } + +    pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); +    pAsm->S[0].src.rtype = DST_REG_TEMPORARY; +    pAsm->S[0].src.reg   = tmp3; + +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); +    noneg_PVSSRC(&(pAsm->S[0].src)); + +    if( GL_FALSE == next_ins(pAsm) ) +    { +        return GL_FALSE; +    } + +    // ADD     tmp3.x,   tmp2.x,    -tmp3.x +    // EX2     dst.y,    tmp3.x + +    pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); +    pAsm->D.dst.rtype  = DST_REG_TEMPORARY; +    pAsm->D.dst.reg    = tmp3; +    pAsm->D.dst.writex = 1; + +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); +    pAsm->S[0].src.rtype = DST_REG_TEMPORARY; +    pAsm->S[0].src.reg   = tmp2; + +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); +    noneg_PVSSRC(&(pAsm->S[0].src)); + +    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); +    pAsm->S[1].src.rtype = DST_REG_TEMPORARY; +    pAsm->S[1].src.reg   = tmp3; + +    setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); +    neg_PVSSRC(&(pAsm->S[1].src)); + +    if( GL_FALSE == next_ins(pAsm) ) +    { +        return GL_FALSE; +    } + +    pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; +    pAsm->D.dst.math = 1; + +    if( GL_FALSE == assemble_dst(pAsm) ) +    { +        return GL_FALSE; +    } + +    pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); +    pAsm->S[0].src.rtype = DST_REG_TEMPORARY; +    pAsm->S[0].src.reg   = tmp3; + +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); +    noneg_PVSSRC(&(pAsm->S[0].src)); + +    if( GL_FALSE == next_ins(pAsm) ) +    { +        return GL_FALSE; +    } + +    // MOV     dst.z,    tmp2.x + +    pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + +    if( GL_FALSE == assemble_dst(pAsm) ) +    { +        return GL_FALSE; +    } + +    pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0; + +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); +    pAsm->S[0].src.rtype = DST_REG_TEMPORARY; +    pAsm->S[0].src.reg   = tmp2; + +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); +    noneg_PVSSRC(&(pAsm->S[0].src)); + +    if( GL_FALSE == next_ins(pAsm) ) +    { +        return GL_FALSE; +    } + +    // MOV     dst.w     1.0 + +    pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + +    if( GL_FALSE == assemble_dst(pAsm) ) +    { +        return GL_FALSE; +    } + +    pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0; + +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); +    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; +    pAsm->S[0].src.reg   = tmp1; + +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1); +    noneg_PVSSRC(&(pAsm->S[0].src)); + +    if( GL_FALSE == next_ins(pAsm) ) +    { +        return GL_FALSE; +    } + +    return GL_TRUE; +} +  GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)   {      int tmp, ii; @@ -4166,10 +4377,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,                  return GL_FALSE;              break;            case OPCODE_LOG:  -            radeon_error("Not yet implemented instruction OPCODE_LOG \n"); -            //if ( GL_FALSE == assemble_BAD("LOG") )  +            if ( GL_FALSE == assemble_LOG(pR700AsmCode) )                   return GL_FALSE; -            break; // approx of LG2 +            break;          case OPCODE_MAD:               if ( GL_FALSE == assemble_MAD(pR700AsmCode) )  diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 317feb1b7d..c66db502a1 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -484,6 +484,7 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm);  GLboolean assemble_KIL(r700_AssemblerBase *pAsm);  GLboolean assemble_LG2(r700_AssemblerBase *pAsm);  GLboolean assemble_LRP(r700_AssemblerBase *pAsm); +GLboolean assemble_LOG(r700_AssemblerBase *pAsm);  GLboolean assemble_MAD(r700_AssemblerBase *pAsm);  GLboolean assemble_LIT(r700_AssemblerBase *pAsm);  GLboolean assemble_MAX(r700_AssemblerBase *pAsm); | 
