diff options
Diffstat (limited to 'src/mesa')
| -rw-r--r-- | src/mesa/x86/mmx_blend.S | 521 | ||||
| -rw-r--r-- | src/mesa/x86/mmx_blendtmp.h | 113 | 
2 files changed, 334 insertions, 300 deletions
| diff --git a/src/mesa/x86/mmx_blend.S b/src/mesa/x86/mmx_blend.S index e679aa7bc7..f80cbf6c45 100644 --- a/src/mesa/x86/mmx_blend.S +++ b/src/mesa/x86/mmx_blend.S @@ -4,8 +4,10 @@  #include "matypes.h" -/* - * make the following approximation to the division (Sree) + +/* integer multiplication - alpha plus one + * + * makes the following approximation to the division (Sree)   *   *   rgb*a/255 ~= (rgb*(a+1)) >> 256   * @@ -13,12 +15,24 @@   *   *   0*0 = 0 and 255*255 = 255   * - * note this one should be used alone + * note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making + * + *   PCMPEQW    ( MX1, MX1 )   */ -#define GMBT_ALPHA_PLUS_ONE	0 +#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \ +    PSUBW      ( MX1, MA1 )			/*   a1 + 1  |   a1 + 1  |   a1 + 1  |   a1 + 1  */	;\ +TWO(PSUBW      ( MX1, MA2 ))			/*   a2 + 1  |   a2 + 1  |   a2 + 1  |   a2 + 1  */	;\ +													;\ +    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\ +TWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\ +													;\ +    PSRLW      ( CONST(8), MA1 )		/*               t1 >> 8 ~= t1/255               */	;\ +TWO(PSRLW      ( CONST(8), MA2 ))		/*               t2 >> 8 ~= t2/255               */	 -/* - * take the geometric series approximation to the division + +/* integer multiplication - geometric series + * + * takes the geometric series approximation to the division   *   *   t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..   * @@ -29,333 +43,240 @@   * note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254,    * so the special case a = 255 must be accounted or roundoff must be used   */ -#define GMBT_GEOMETRIC_SERIES	1 +#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \ +    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\ +TWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\ +													;\ +    MOVQ       ( MA1, MP1 )										;\ +TWO(MOVQ       ( MA2, MP2 ))										;\ +													;\ +    PSRLW      ( CONST(8), MP1 )		/*                    t1 >> 8                    */	;\ +TWO(PSRLW      ( CONST(8), MP2 ))		/*                    t2 >> 8                    */	;\ +													;\ +    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\ +TWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\ +													;\ +    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\ +TWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */ -/* + +/* integer multiplication - geometric series plus rounding + *   * when using a geometric series division instead of truncating the result    * use roundoff in the approximation (Jim Blinn)   *   *   t = rgb*a + 0x80   *   * achieving the exact results + * + * note that M80 is register with the 0x0080008000800080 constant + */ +#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \ +    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\ +TWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\ +													;\ +    PADDW      ( M80, MA1 )			/*                 t1 += 0x80                    */	;\ +TWO(PADDW      ( M80, MA2 ))			/*                 t2 += 0x80                    */	;\ +													;\ +    MOVQ       ( MA1, MP1 )										;\ +TWO(MOVQ       ( MA2, MP2 ))										;\ +													;\ +    PSRLW      ( CONST(8), MP1 )		/*                    t1 >> 8                    */	;\ +TWO(PSRLW      ( CONST(8), MP2 ))		/*                    t2 >> 8                    */	;\ +													;\ +    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\ +TWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\ +													;\ +    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\ +TWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */ + + +/* linear interpolation - geometric series  + */ +#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \ +    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\ +TWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\ +													;\ +    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\ +TWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\ +													;\ +    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\ +TWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\ +													;\ +    MOVQ       ( MA1, MP1 )										;\ +TWO(MOVQ       ( MA2, MP2 ))										;\ +													;\ +    PSRLW      ( CONST(8), MP1 )		/*                    t1 >> 8                    */	;\ +TWO(PSRLW      ( CONST(8), MP2 ))		/*                    t2 >> 8                    */	;\ +													;\ +    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\ +TWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\ +													;\ +    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\ +TWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\ +													;\ +    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\ +TWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */ + + +/* linear interpolation - geometric series with roundoff + * + * this is a generalization of Blinn's formula to signed arithmetic + * + * note that M80 is a register with the 0x0080008000800080 constant   */ -#define GMBT_ROUNDOFF		0 +#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \ +    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\ +TWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\ +													;\ +    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\ +TWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\ +													;\ +    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\ +TWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\ +													;\ +    PSRLW      ( CONST(15), MP1 )		/*                 q1 > p1 ? 1 : 0               */	;\ +TWO(PSRLW      ( CONST(15), MP2 ))		/*                 q2 > q2 ? 1 : 0               */	;\ +													;\ +    PSLLW      ( CONST(8), MP1 )		/*             q1 > p1 ? 0x100 : 0               */	;\ +TWO(PSLLW      ( CONST(8), MP2 ))		/*             q2 > q2 ? 0x100 : 0               */	;\ +													;\ +    PSUBW      ( MP1, MA1 )			/*                  t1 -=? 0x100                 */	;\ +TWO(PSUBW      ( MP2, MA2 ))			/*                  t2 -=? 0x100                 */	;\ + 													;\ +    PADDW      ( M80, MA1 )			/*                 t1 += 0x80                    */	;\ +TWO(PADDW      ( M80, MA2 ))			/*                 t2 += 0x80                    */	;\ +													;\ +    MOVQ       ( MA1, MP1 )										;\ +TWO(MOVQ       ( MA2, MP2 ))										;\ +													;\ +    PSRLW      ( CONST(8), MP1 )		/*                    t1 >> 8                    */	;\ +TWO(PSRLW      ( CONST(8), MP2 ))		/*                    t2 >> 8                    */	;\ +													;\ +    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\ +TWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\ +													;\ +    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\ +TWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\ +													;\ +    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\ +TWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */ + -/* instead of the roundoff this adds a small correction to satisfy the OpenGL criteria +/* linear interpolation - geometric series with correction + * + * instead of the roundoff this adds a small correction to satisfy the OpenGL criteria   *   *   t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8   *   * note that although is faster than rounding off it doesn't give always the exact results   */ -#define GMBT_GEOMETRIC_CORRECTION	1 +#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \ +    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\ +TWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\ +													;\ +    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\ +TWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\ +													;\ +    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\ +TWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\ +													;\ +    MOVQ       ( MA1, MP1 )										;\ +TWO(MOVQ       ( MA2, MP2 ))										;\ +													;\ +    PSRLW      ( CONST(8), MP1 )		/*                    t1 >> 8                    */	;\ +TWO(PSRLW      ( CONST(8), MP2 ))		/*                    t2 >> 8                    */	;\ +													;\ +    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\ +TWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\ +													;\ +    PSRLW      ( CONST(7), MP1 )		/*                    t1 >> 15                   */	;\ +TWO(PSRLW      ( CONST(7), MP2 ))		/*                    t2 >> 15                   */	;\ +													;\ +    PADDW      ( MP1, MA1 )			/*  t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8  */	;\ +TWO(PADDW      ( MP2, MA2 ))			/*  t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8  */	;\ +													;\ +    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\ +TWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\ +													;\ +    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\ +TWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */ + -#if GMBT_ROUNDOFF +/* common blending initialization code + */ +#if 0	/* rounding not used */      SEG_DATA  ALIGNDATA8  const_80:  	D_LONG 0x00800080, 0x00800080 -#endif  - -   SEG_TEXT - -ALIGNTEXT16 -GLOBL GLNAME(_mesa_mmx_blend_transparency) - -/* - * void blend_transparency( GLcontext *ctx, - *                          GLuint n,  - *                          const GLubyte mask[], - *                          GLchan rgba[][4],  - *                          CONST GLchan dest[][4] ) - *  - * Common transparency blending mode. - */ -GLNAME( _mesa_mmx_blend_transparency ): - -    PUSH_L     ( EBP ) -    MOV_L      ( ESP, EBP ) -    PUSH_L     ( ESI ) -    PUSH_L     ( EDI ) -    PUSH_L     ( EBX ) - -    MOV_L      ( REGOFF(12, EBP), ECX )		/* n */ -    CMP_L      ( CONST(0), ECX) -    JE         ( LLBL (GMBT_return) ) - -    MOV_L      ( REGOFF(16, EBP), EBX )		/* mask */ -    MOV_L      ( REGOFF(20, EBP), EDI )         /* rgba */ -    MOV_L      ( REGOFF(24, EBP), ESI )         /* dest */ -     -    TEST_L     ( CONST(4), EDI )		/* align rgba on an 8-byte boundary */ -    JZ         ( LLBL (GMBT_align_end) ) -    CMP_B      ( CONST(0), REGIND(EBX) )	/* *mask == 0 */ -    JE         ( LLBL (GMBT_align_continue) ) +#define GMB_INIT( M00, M80 ) \ +    PXOR       ( M00, M00 )			/*   0x0000  |   0x0000  |   0x0000  |   0x0000  */ +    MOVQ       ( CONTENT(const_80), M80 )	/*   0xffff  |   0xffff  |   0xffff  |   0xffff  */ -    PXOR       ( MM0, MM0 )			/*   0x0000  |   0x0000  |   0x0000  |   0x0000  */ +#else -    MOVD       ( REGIND(ESI), MM1 )		/*     |     |     |     | qa1 | qb1 | qg1 | qr1 */ -    MOVD       ( REGIND(EDI), MM2 )		/*     |     |     |     | pa1 | pb1 | pg1 | pr1 */ +#define GMB_INIT( M00 ) \ +    PXOR       ( M00, M00 )			/*   0x0000  |   0x0000  |   0x0000  |   0x0000  */ -    PUNPCKLBW  ( MM0, MM1 )			/*    qa1    |    qb1    |    qg1    |    qr1    */ -    PUNPCKLBW  ( MM0, MM2 )			/*    pa1    |    pb1    |    pg1    |    pr1    */ - -    MOVQ       ( MM2, MM3 ) - -    PUNPCKHWD  ( MM3, MM3 )			/*    pa1    |    pa1    |           |           */ -    PUNPCKHDQ  ( MM3, MM3 )                     /*    pa1    |    pa1    |    pa1    |    pa1    */ - -#if GMBT_ALPHA_PLUS_ONE -    PCMPEQW    ( MM4, MM4 )			/*   0xffff  |   0xffff  |   0xffff  |   0xffff  */ - -    PSUBW      ( MM4, MM3 )                     /*   pa1 + 1 |   pa1 + 1 |   pa1 + 1 |   pa1 + 1 */ -#endif - -    PSUBW      ( MM1, MM2 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ - -    PSLLW      ( CONST(8), MM1 )		/*                    q1 << 8                    */ - -#if GMBT_ROUNDOFF -    MOVQ       ( MM2, MM4 ) -#endif - -    PMULLW     ( MM3, MM2 )			/*              t1 = (q1 - p1)*pa1               */ - -#if GMBT_ROUNDOFF -    PSRLW      ( CONST(15), MM4 )		/*                 q1 > p1 ? 1 : 0               */ - -    PSLLW      ( CONST(8), MM4 )		/*             q1 > p1 ? 0x100 : 0               */ - -    PSUBW      ( MM4, MM2 )                     /*                  t1 -=? 0x100                 */ -#endif - -#if GMBT_ROUNDOFF -    MOVQ       ( CONTENT(const_80), MM4 ) - -    PADDW      ( MM4, MM2 )                     /*                 t1 += 0x80                    */  #endif -#if GMBT_GEOMETRIC_SERIES -    MOVQ       ( MM2, MM3 ) - -    PSRLW      ( CONST(8), MM3 )		/*                    t1 >> 8                    */ - -    PADDW      ( MM3, MM2 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */ - -#if GMBT_GEOMETRIC_CORRECTION  -    PSRLW      ( CONST(7), MM3 )		/*                    t1 >> 15                   */ - -    PADDW      ( MM3, MM2 )			/*  t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8  */ -#endif -#endif - -    PADDW      ( MM1, MM2 )			/*              (t1/255 + q1) << 8               */ - -    PSRLW      ( CONST(8), MM2 )		/*    sa1    |    sb1    |    sg1    |    sr1    */ -     -    PACKUSWB   ( MM0, MM2 )			/*     |     |     |     | sa1 | sb1 | sg1 | sr1 */ -    MOVD       ( MM2, REGIND(EDI) ) - -LLBL (GMBT_align_continue): - -    DEC_L      ( ECX )				/* n -= 1 */ -    INC_L      ( EBX )		                /* mask += 1 */ -    ADD_L      ( CONST(4), EDI )		/* rgba += 1 */ -    ADD_L      ( CONST(4), ESI )		/* dest += 1 */  - -LLBL (GMBT_align_end): - -    CMP_L      ( CONST(2), ECX) -    JB         ( LLBL (GMBT_loop_end) ) - -ALIGNTEXT16 -LLBL (GMBT_loop_begin): - -    CMP_W      ( CONST(0), REGIND(EBX) )	/* *mask == 0 && *(mask + 1) == 0 */ -    JE         ( LLBL (GMBT_loop_continue) ) - -    /* NOTE: the instruction pairing when multiple pipelines are available must be checked */ - -    PXOR       ( MM0, MM0 )			/*   0x0000  |   0x0000  |   0x0000  |   0x0000  */ - -    MOVQ       ( REGIND(ESI), MM7 )		/* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ -    MOVQ       ( REGIND(EDI), MM6 )		/* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */ - -    MOVQ       ( MM7, MM1 ) -    MOVQ       ( MM6, MM2 ) - -    PUNPCKLBW  ( MM0, MM1 )			/*    qa1    |    qb1    |    qg1    |    qr1    */ -    PUNPCKHBW  ( MM0, MM7 )                     /*    qa2    |    qb2    |    qg2    |    qr2    */ -    PUNPCKLBW  ( MM0, MM2 )			/*    pa1    |    pb1    |    pg1    |    pr1    */ -    PUNPCKHBW  ( MM0, MM6 )                     /*    pa2    |    pb2    |    pg2    |    pr2    */ - -    MOVQ       ( MM2, MM3 ) -    MOVQ       ( MM6, MM5 ) - -    PUNPCKHWD  ( MM3, MM3 )			/*    pa1    |    pa1    |           |           */ -    PUNPCKHWD  ( MM5, MM5 )			/*    pa2    |    pa2    |           |           */ -    PUNPCKHDQ  ( MM3, MM3 )                     /*    pa1    |    pa1    |    pa1    |    pa1    */ -    PUNPCKHDQ  ( MM5, MM5 )                     /*    pa2    |    pa2    |    pa2    |    pa2    */ - -#if GMBT_ALPHA_PLUS_ONE -    PCMPEQW    ( MM4, MM4 )			/*   0xffff  |   0xffff  |   0xffff  |   0xffff  */ - -    PSUBW      ( MM4, MM3 )                     /*   pa1 + 1 |   pa1 + 1 |   pa1 + 1 |   pa1 + 1 */ -    PSUBW      ( MM4, MM5 )                     /*   pa2 + 1 |   pa2 + 1 |   pa2 + 1 |   pa2 + 1 */ -#endif - -    PSUBW      ( MM1, MM2 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ -    PSUBW      ( MM7, MM6 )                     /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ - -    PSLLW      ( CONST(8), MM1 )		/*                    q1 << 8                    */ -    PSLLW      ( CONST(8), MM7 )		/*                    q2 << 8                    */ - -#if GMBT_ROUNDOFF -    MOVQ       ( MM2, MM0 ) -    MOVQ       ( MM6, MM4 ) -#endif - -    PMULLW     ( MM3, MM2 )			/*              t1 = (q1 - p1)*pa1               */ -    PMULLW     ( MM5, MM6 )			/*              t2 = (q2 - p2)*pa2               */ - -#if GMBT_ROUNDOFF -    PSRLW      ( CONST(15), MM0 )		/*                 q1 > p1 ? 1 : 0               */ -    PSRLW      ( CONST(15), MM4 )		/*                 q2 > q2 ? 1 : 0               */ - -    PSLLW      ( CONST(8), MM0 )		/*             q1 > p1 ? 0x100 : 0               */ -    PSLLW      ( CONST(8), MM4 )		/*             q2 > q2 ? 0x100 : 0               */ - -    PSUBW      ( MM0, MM2 )                     /*                  t1 -=? 0x100                 */ -    PSUBW      ( MM4, MM7 )                     /*                  t2 -=? 0x100                 */  -#endif - -#if GMBT_ROUNDOFF -    MOVQ       ( CONTENT(const_80), MM4 ) - -    PADDW      ( MM4, MM2 )                     /*                 t1 += 0x80                    */ -    PADDW      ( MM4, MM6 )                     /*                 t2 += 0x80                    */ -#endif - -#if GMBT_GEOMETRIC_SERIES -    MOVQ       ( MM2, MM3 ) -    MOVQ       ( MM6, MM5 ) - -    PSRLW      ( CONST(8), MM3 )		/*                    t1 >> 8                    */ -    PSRLW      ( CONST(8), MM5 )		/*                    t2 >> 8                    */ - -    PADDW      ( MM3, MM2 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */ -    PADDW      ( MM5, MM6 )			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */ - -#if GMBT_GEOMETRIC_CORRECTION  -    PSRLW      ( CONST(7), MM3 )		/*                    t1 >> 15                   */ -    PSRLW      ( CONST(7), MM5 )		/*                    t2 >> 15                   */ - -    PADDW      ( MM3, MM2 )			/*  t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8  */ -    PADDW      ( MM5, MM6 )			/*  t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8  */ -#endif -#endif - -    PADDW      ( MM1, MM2 )			/*              (t1/255 + q1) << 8               */ -    PADDW      ( MM7, MM6 )			/*              (t2/255 + q2) << 8               */ - -    PSRLW      ( CONST(8), MM2 )		/*    sa1    |    sb1    |    sg1    |    sr1    */ -    PSRLW      ( CONST(8), MM6 )		/*    sa2    |    sb2    |    sg2    |    sr2    */ -     -    PACKUSWB   ( MM6, MM2 )			/* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ -    MOVQ       ( MM2, REGIND(EDI) ) - -LLBL (GMBT_loop_continue): - -    DEC_L      ( ECX ) -    DEC_L      ( ECX )				/* n -= 2 */ -    ADD_L      ( CONST(2), EBX )		/* mask += 2 */ -    ADD_L      ( CONST(8), EDI )		/* rgba += 2 */ -    ADD_L      ( CONST(8), ESI )		/* dest += 2 */  -    CMP_L      ( CONST(2), ECX ) -    JAE        ( LLBL (GMBT_loop_begin) ) - -LLBL (GMBT_loop_end): - -    CMP_L      ( CONST(1), ECX ) -    JB         ( LLBL (GMBT_done) ) - -    CMP_B      ( CONST(0), REGIND(EBX) )	/* *mask == 0 */ -    JE         ( LLBL (GMBT_done) ) - -    PXOR       ( MM0, MM0 )			/*   0x0000  |   0x0000  |   0x0000  |   0x0000  */ - -    MOVD       ( REGIND(ESI), MM1 )		/*     |     |     |     | qa1 | qb1 | qg1 | qr1 */ -    MOVD       ( REGIND(EDI), MM2 )		/*     |     |     |     | pa1 | pb1 | pg1 | pr1 */ - -    PUNPCKLBW  ( MM0, MM1 )			/*    qa1    |    qb1    |    qg1    |    qr1    */ -    PUNPCKLBW  ( MM0, MM2 )			/*    pa1    |    pb1    |    pg1    |    pr1    */ - -    MOVQ       ( MM2, MM3 ) - -    PUNPCKHWD  ( MM3, MM3 )			/*    pa1    |    pa1    |           |           */ -    PUNPCKHDQ  ( MM3, MM3 )                     /*    pa1    |    pa1    |    pa1    |    pa1    */ - -#if GMBT_ALPHA_PLUS_ONE -    PCMPEQW    ( MM4, MM4 )			/*   0xffff  |   0xffff  |   0xffff  |   0xffff  */ - -    PSUBW      ( MM4, MM3 )                     /*   pa1 + 1 |   pa1 + 1 |   pa1 + 1 |   pa1 + 1 */ -#endif - -    PSUBW      ( MM1, MM2 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ - -    PSLLW      ( CONST(8), MM1 )		/*                    q1 << 8                    */ - -#if GMBT_ROUNDOFF -    MOVQ       ( MM2, MM4 ) -#endif - -    PMULLW     ( MM3, MM2 )			/*              t1 = (q1 - p1)*pa1               */ - -#if GMBT_ROUNDOFF -    PSRLW      ( CONST(15), MM4 )		/*                 q1 > p1 ? 1 : 0               */ - -    PSLLW      ( CONST(8), MM4 )		/*             q1 > p1 ? 0x100 : 0               */ - -    PSUBW      ( MM4, MM2 )                     /*                  t1 -=? 0x100                 */ -#endif - -#if GMBT_ROUNDOFF -    MOVQ       ( CONTENT(const_80), MM4 ) - -    PADDW      ( MM4, MM2 )                     /*                 t1 += 0x80                    */ -#endif - -#if GMBT_GEOMETRIC_SERIES -    MOVQ       ( MM2, MM3 ) +/* common blending loading code + * + * note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making + * + *   PXOR      ( M00, M00 ) + */ +#define GMB_LOAD(rgba, dest, MP1, MQ1, MA1, MP2, MQ2, MA2, M00) \ +ONE(MOVD       ( REGIND(rgba), MP1 ))		/*     |     |     |     | qa1 | qb1 | qg1 | qr1 */	;\ +ONE(MOVD       ( REGIND(dest), MQ1 ))		/*     |     |     |     | pa1 | pb1 | pg1 | pr1 */	;\ +													;\ +TWO(MOVQ       ( REGIND(rgba), MP1 ))		/* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */	;\ +TWO(MOVQ       ( REGIND(dest), MQ1 ))		/* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */	;\ +													;\ +TWO(MOVQ       ( MP1, MP2 ))										;\ +TWO(MOVQ       ( MQ1, MQ2 ))										;\ +													;\ +    PUNPCKLBW  ( M00, MQ1 )			/*    qa1    |    qb1    |    qg1    |    qr1    */	;\ +TWO(PUNPCKHBW  ( M00, MQ2 ))                    /*    qa2    |    qb2    |    qg2    |    qr2    */	;\ +    PUNPCKLBW  ( M00, MP1 )			/*    pa1    |    pb1    |    pg1    |    pr1    */	;\ +TWO(PUNPCKHBW  ( M00, MP2 ))                    /*    pa2    |    pb2    |    pg2    |    pr2    */	;\ +													;\ +    MOVQ       ( MP1, MA1 )										;\ +TWO(MOVQ       ( MP2, MA2 ))										;\ +													;\ +    PUNPCKHWD  ( MA1, MA1 )			/*    pa1    |    pa1    |           |           */	;\ +TWO(PUNPCKHWD  ( MA2, MA2 ))			/*    pa2    |    pa2    |           |           */	;\ +    PUNPCKHDQ  ( MA1, MA1 )                     /*    pa1    |    pa1    |    pa1    |    pa1    */	;\ +TWO(PUNPCKHDQ  ( MA2, MA2 ))                    /*    pa2    |    pa2    |    pa2    |    pa2    */ -    PSRLW      ( CONST(8), MM3 )		/*                    t1 >> 8                    */ +  +/* common blending storing code + */ +#define GMB_STORE(rgba, MA1, MA2) \ +    PACKUSWB   ( MA2, MA1 )			/* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */	;\ +													;\ +ONE(MOVD       ( MA1, REGIND(rgba) ))									;\ +TWO(MOVQ       ( MA1, REGIND(rgba) )) -    PADDW      ( MM3, MM2 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */ -#if GMBT_GEOMETRIC_CORRECTION  -    PSRLW      ( CONST(7), MM3 )		/*                    t1 >> 15                   */ +   SEG_TEXT -    PADDW      ( MM3, MM2 )			/*  t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8  */ -#endif -#endif -    PADDW      ( MM1, MM2 )			/*              (t1/255 + q1) << 8               */ +/* common transparency blending mode + */ -    PSRLW      ( CONST(8), MM2 )		/*    sa1    |    sb1    |    sg1    |    sr1    */ -     -    PACKUSWB   ( MM0, MM2 )			/*     |     |     |     | sa1 | sb1 | sg1 | sr1 */ -    MOVD       ( MM2, REGIND(EDI) ) +#define TAG(x) x##_transparency -LLBL (GMBT_done): +#define INIT \ +	GMB_INIT( MM0 ) -    EMMS +#define MAIN \ +	GMB_LOAD( EDI, ESI, MM1, MM2, MM3, MM4, MM5, MM6, MM0)						;\ +	GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 )							;\ +	GMB_STORE( EDI, MM3, MM6 ) -LLBL (GMBT_return): +#include "mmx_blendtmp.h" -    POP_L      ( EBX ) -    POP_L      ( EDI ) -    POP_L      ( ESI ) -    MOV_L      ( EBP, ESP ) -    POP_L      ( EBP ) -    RET diff --git a/src/mesa/x86/mmx_blendtmp.h b/src/mesa/x86/mmx_blendtmp.h new file mode 100644 index 0000000000..395436ba01 --- /dev/null +++ b/src/mesa/x86/mmx_blendtmp.h @@ -0,0 +1,113 @@ +/* + * Written by José Fonseca <j_r_fonseca@yahoo.co.uk> + */ + + +/* + * void _mesa_mmx_blend( GLcontext *ctx, + *                       GLuint n,  + *                       const GLubyte mask[], + *                       GLchan rgba[][4],  + *                       CONST GLchan dest[][4] ) + *  + */ +ALIGNTEXT16 +GLOBL GLNAME( TAG(_mesa_mmx_blend) ) + +GLNAME( TAG(_mesa_mmx_blend) ): + +    PUSH_L     ( EBP ) +    MOV_L      ( ESP, EBP ) +    PUSH_L     ( ESI ) +    PUSH_L     ( EDI ) +    PUSH_L     ( EBX ) + +    MOV_L      ( REGOFF(12, EBP), ECX )		/* n */ +    CMP_L      ( CONST(0), ECX) +    JE         ( LLBL ( TAG(GMB_return) ) ) + +    MOV_L      ( REGOFF(16, EBP), EBX )		/* mask */ +    MOV_L      ( REGOFF(20, EBP), EDI )         /* rgba */ +    MOV_L      ( REGOFF(24, EBP), ESI )         /* dest */ + +    INIT +     +    TEST_L     ( CONST(4), EDI )		/* align rgba on an 8-byte boundary */ +    JZ         ( LLBL ( TAG(GMB_align_end) ) ) + +    CMP_B      ( CONST(0), REGIND(EBX) )	/* *mask == 0 */ +    JE         ( LLBL ( TAG(GMB_align_continue) ) ) + +    /* runin */ +#define ONE(x)	x +#define TWO(x)   +    MAIN +#undef ONE +#undef TWO + +LLBL ( TAG(GMB_align_continue) ): + +    DEC_L      ( ECX )				/* n -= 1 */ +    INC_L      ( EBX )		                /* mask += 1 */ +    ADD_L      ( CONST(4), EDI )		/* rgba += 1 */ +    ADD_L      ( CONST(4), ESI )		/* dest += 1 */  + +LLBL ( TAG(GMB_align_end) ): + +    CMP_L      ( CONST(2), ECX) +    JB         ( LLBL ( TAG(GMB_loop_end) ) ) + +ALIGNTEXT16 +LLBL ( TAG(GMB_loop_begin) ): + +    CMP_W      ( CONST(0), REGIND(EBX) )	/* *mask == 0 && *(mask + 1) == 0 */ +    JE         ( LLBL ( TAG(GMB_loop_continue) ) ) + +    /* main loop */ +#define ONE(x) +#define TWO(x)	x +    MAIN +#undef ONE +#undef TWO + +LLBL ( TAG(GMB_loop_continue) ): + +    DEC_L      ( ECX ) +    DEC_L      ( ECX )				/* n -= 2 */ +    ADD_L      ( CONST(2), EBX )		/* mask += 2 */ +    ADD_L      ( CONST(8), EDI )		/* rgba += 2 */ +    ADD_L      ( CONST(8), ESI )		/* dest += 2 */  +    CMP_L      ( CONST(2), ECX ) +    JAE        ( LLBL ( TAG(GMB_loop_begin) ) ) + +LLBL ( TAG(GMB_loop_end) ): + +    CMP_L      ( CONST(1), ECX ) +    JB         ( LLBL ( TAG(GMB_done) ) ) + +    CMP_B      ( CONST(0), REGIND(EBX) )	/* *mask == 0 */ +    JE         ( LLBL ( TAG(GMB_done) ) ) + +    /* runout */ +#define ONE(x)	x +#define TWO(x) +    MAIN +#undef ONE +#undef TWO + +LLBL ( TAG(GMB_done) ): + +    EMMS + +LLBL ( TAG(GMB_return) ): + +    POP_L      ( EBX ) +    POP_L      ( EDI ) +    POP_L      ( ESI ) +    MOV_L      ( EBP, ESP ) +    POP_L      ( EBP ) +    RET + +#undef TAG +#undef INIT +#undef MAIN | 
