From 9c16fcc617b8c5c4db825ed290f994e535de8c65 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 1 Mar 2011 11:57:51 +1000
Subject: rgtc: shared the compressor code between signed/unsigned

No idea why I didn't do it like this the first time, but share
the code like other portions of mesa do using _tmp.h suffix
and some #defines for the types.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/main/texcompress_rgtc.c     | 723 ++---------------------------------
 src/mesa/main/texcompress_rgtc_tmp.h | 376 ++++++++++++++++++
 2 files changed, 403 insertions(+), 696 deletions(-)
 create mode 100644 src/mesa/main/texcompress_rgtc_tmp.h

diff --git a/src/mesa/main/texcompress_rgtc.c b/src/mesa/main/texcompress_rgtc.c
index 1a01755f14..2f3a0f214a 100644
--- a/src/mesa/main/texcompress_rgtc.c
+++ b/src/mesa/main/texcompress_rgtc.c
@@ -46,9 +46,9 @@
 
 #define RGTC_DEBUG 0
 
-static void encode_rgtc_chan_u(GLubyte *blkaddr, GLubyte srccolors[4][4],
-			     GLint numxpixels, GLint numypixels);
-static void encode_rgtc_chan_s(GLbyte *blkaddr, GLbyte srccolors[4][4],
+static void unsigned_encode_rgtc_chan(GLubyte *blkaddr, GLubyte srccolors[4][4],
+					GLint numxpixels, GLint numypixels);
+static void signed_encode_rgtc_chan(GLbyte *blkaddr, GLbyte srccolors[4][4],
 			     GLint numxpixels, GLint numypixels);
 
 static void extractsrc_u( GLubyte srcpixels[4][4], const GLchan *srcaddr,
@@ -123,7 +123,7 @@ _mesa_texstore_red_rgtc1(TEXSTORE_PARAMS)
 	 if (srcWidth > i + 3) numxpixels = 4;
 	 else numxpixels = srcWidth - i;
 	 extractsrc_u(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 1);
-	 encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+	 unsigned_encode_rgtc_chan(blkaddr, srcpixels, numxpixels, numypixels);
 	 srcaddr += numxpixels;
 	 blkaddr += 8;
       }
@@ -177,7 +177,7 @@ _mesa_texstore_signed_red_rgtc1(TEXSTORE_PARAMS)
 	 if (srcWidth > i + 3) numxpixels = 4;
 	 else numxpixels = srcWidth - i;
 	 extractsrc_s(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 1);
-	 encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+	 signed_encode_rgtc_chan(blkaddr, srcpixels, numxpixels, numypixels);
 	 srcaddr += numxpixels;
 	 blkaddr += 8;
       }
@@ -232,11 +232,11 @@ _mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS)
 	 if (srcWidth > i + 3) numxpixels = 4;
 	 else numxpixels = srcWidth - i;
 	 extractsrc_u(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 2);
-	 encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+	 unsigned_encode_rgtc_chan(blkaddr, srcpixels, numxpixels, numypixels);
 
 	 blkaddr += 8;
 	 extractsrc_u(srcpixels, (GLchan *)srcaddr + 1, srcWidth, numxpixels, numypixels, 2);
-	 encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+	 unsigned_encode_rgtc_chan(blkaddr, srcpixels, numxpixels, numypixels);
 
 	 blkaddr += 8;
 
@@ -294,11 +294,11 @@ _mesa_texstore_signed_rg_rgtc2(TEXSTORE_PARAMS)
 	 else numxpixels = srcWidth - i;
 
 	 extractsrc_s(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 2);
-	 encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+	 signed_encode_rgtc_chan(blkaddr, srcpixels, numxpixels, numypixels);
 	 blkaddr += 8;
 
 	 extractsrc_s(srcpixels, srcaddr + 1, srcWidth, numxpixels, numypixels, 2);
-	 encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+	 signed_encode_rgtc_chan(blkaddr, srcpixels, numxpixels, numypixels);
 	 blkaddr += 8;
 
 	 srcaddr += numxpixels * 2;
@@ -427,696 +427,27 @@ _mesa_fetch_texel_2d_f_signed_rg_rgtc2(const struct gl_texture_image *texImage,
    texel[ACOMP] = 1.0;
 }
 
-static void write_rgtc_encoded_channel(GLubyte *blkaddr,
-				       GLubyte alphabase1,
-				       GLubyte alphabase2,
-				       GLubyte alphaenc[16])
-{
-   *blkaddr++ = alphabase1;
-   *blkaddr++ = alphabase2;
-   *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
-   *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
-   *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
-   *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
-   *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
-   *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
-}
-
-static void encode_rgtc_chan_u(GLubyte *blkaddr, GLubyte srccolors[4][4],
-			     GLint numxpixels, GLint numypixels)
-{
-   GLubyte alphabase[2], alphause[2];
-   GLshort alphatest[2] = { 0 };
-   GLuint alphablockerror1, alphablockerror2, alphablockerror3;
-   GLubyte i, j, aindex, acutValues[7];
-   GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
-   GLboolean alphaabsmin = GL_FALSE;
-   GLboolean alphaabsmax = GL_FALSE;
-   GLshort alphadist;
-
-   /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
-   alphabase[0] = 0xff; alphabase[1] = 0x0;
-   for (j = 0; j < numypixels; j++) {
-      for (i = 0; i < numxpixels; i++) {
-         if (srccolors[j][i] == 0)
-            alphaabsmin = GL_TRUE;
-         else if (srccolors[j][i] == 255)
-            alphaabsmax = GL_TRUE;
-         else {
-            if (srccolors[j][i] > alphabase[1])
-               alphabase[1] = srccolors[j][i];
-            if (srccolors[j][i] < alphabase[0])
-               alphabase[0] = srccolors[j][i];
-         }
-      }
-   }
-
-
-   if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
-      /* shortcut here since it is a very common case (and also avoids later problems) */
-      /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
-      /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
-
-      *blkaddr++ = srccolors[0][0];
-      blkaddr++;
-      *blkaddr++ = 0;
-      *blkaddr++ = 0;
-      *blkaddr++ = 0;
-      *blkaddr++ = 0;
-      *blkaddr++ = 0;
-      *blkaddr++ = 0;
-#if RGTC_DEBUG
-      fprintf(stderr, "enc0 used\n");
-#endif
-      return;
-   }
-
-   /* find best encoding for alpha0 > alpha1 */
-   /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
-   alphablockerror1 = 0x0;
-   alphablockerror2 = 0xffffffff;
-   alphablockerror3 = 0xffffffff;
-   if (alphaabsmin) alphause[0] = 0;
-   else alphause[0] = alphabase[0];
-   if (alphaabsmax) alphause[1] = 255;
-   else alphause[1] = alphabase[1];
-   /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
-   for (aindex = 0; aindex < 7; aindex++) {
-      /* don't forget here is always rounded down */
-      acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
-   }
-
-   for (j = 0; j < numypixels; j++) {
-      for (i = 0; i < numxpixels; i++) {
-         /* maybe it's overkill to have the most complicated calculation just for the error
-            calculation which we only need to figure out if encoding1 or encoding2 is better... */
-         if (srccolors[j][i] > acutValues[0]) {
-            alphaenc1[4*j + i] = 0;
-            alphadist = srccolors[j][i] - alphause[1];
-         }
-         else if (srccolors[j][i] > acutValues[1]) {
-            alphaenc1[4*j + i] = 2;
-            alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7;
-         }
-         else if (srccolors[j][i] > acutValues[2]) {
-            alphaenc1[4*j + i] = 3;
-            alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7;
-         }
-         else if (srccolors[j][i] > acutValues[3]) {
-            alphaenc1[4*j + i] = 4;
-            alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7;
-         }
-         else if (srccolors[j][i] > acutValues[4]) {
-            alphaenc1[4*j + i] = 5;
-            alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7;
-         }
-         else if (srccolors[j][i] > acutValues[5]) {
-            alphaenc1[4*j + i] = 6;
-            alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7;
-         }
-         else if (srccolors[j][i] > acutValues[6]) {
-            alphaenc1[4*j + i] = 7;
-            alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7;
-         }
-         else {
-            alphaenc1[4*j + i] = 1;
-            alphadist = srccolors[j][i] - alphause[0];
-         }
-         alphablockerror1 += alphadist * alphadist;
-      }
-   }
-
-#if RGTC_DEBUG
-   for (i = 0; i < 16; i++) {
-      fprintf(stderr, "%d ", alphaenc1[i]);
-   }
-   fprintf(stderr, "cutVals ");
-   for (i = 0; i < 8; i++) {
-      fprintf(stderr, "%d ", acutValues[i]);
-   }
-   fprintf(stderr, "srcVals ");
-   for (j = 0; j < numypixels; j++) {
-      for (i = 0; i < numxpixels; i++) {
-	 fprintf(stderr, "%d ", srccolors[j][i]);
-      }
-   }
-   fprintf(stderr, "\n");
-#endif
-
-   /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
-      are false but try it anyway */
-   if (alphablockerror1 >= 32) {
-
-      /* don't bother if encoding is already very good, this condition should also imply
-      we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
-      alphablockerror2 = 0;
-      for (aindex = 0; aindex < 5; aindex++) {
-         /* don't forget here is always rounded down */
-         acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
-      }
-      for (j = 0; j < numypixels; j++) {
-         for (i = 0; i < numxpixels; i++) {
-             /* maybe it's overkill to have the most complicated calculation just for the error
-               calculation which we only need to figure out if encoding1 or encoding2 is better... */
-            if (srccolors[j][i] == 0) {
-               alphaenc2[4*j + i] = 6;
-               alphadist = 0;
-            }
-            else if (srccolors[j][i] == 255) {
-               alphaenc2[4*j + i] = 7;
-               alphadist = 0;
-            }
-            else if (srccolors[j][i] <= acutValues[0]) {
-               alphaenc2[4*j + i] = 0;
-               alphadist = srccolors[j][i] - alphabase[0];
-            }
-            else if (srccolors[j][i] <= acutValues[1]) {
-               alphaenc2[4*j + i] = 2;
-               alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
-            }
-            else if (srccolors[j][i] <= acutValues[2]) {
-               alphaenc2[4*j + i] = 3;
-               alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
-            }
-            else if (srccolors[j][i] <= acutValues[3]) {
-               alphaenc2[4*j + i] = 4;
-               alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
-            }
-            else if (srccolors[j][i] <= acutValues[4]) {
-               alphaenc2[4*j + i] = 5;
-               alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
-            }
-            else {
-               alphaenc2[4*j + i] = 1;
-               alphadist = srccolors[j][i] - alphabase[1];
-            }
-            alphablockerror2 += alphadist * alphadist;
-         }
-      }
-
-
-      /* skip this if the error is already very small
-         this encoding is MUCH better on average than #2 though, but expensive! */
-      if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
-         GLshort blockerrlin1 = 0;
-         GLshort blockerrlin2 = 0;
-         GLubyte nralphainrangelow = 0;
-         GLubyte nralphainrangehigh = 0;
-         alphatest[0] = 0xff;
-         alphatest[1] = 0x0;
-         /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
-         for (j = 0; j < numypixels; j++) {
-            for (i = 0; i < numxpixels; i++) {
-               if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (255 -(alphabase[1] - alphabase[0]) / 28)))
-                  alphatest[1] = srccolors[j][i];
-               if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28))
-                  alphatest[0] = srccolors[j][i];
-            }
-         }
-          /* shouldn't happen too often, don't really care about those degenerated cases */
-          if (alphatest[1] <= alphatest[0]) {
-             alphatest[0] = 1;
-             alphatest[1] = 254;
-         }
-         for (aindex = 0; aindex < 5; aindex++) {
-         /* don't forget here is always rounded down */
-            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
-         }
-
-         /* find the "average" difference between the alpha values and the next encoded value.
-            This is then used to calculate new base values.
-            Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
-            since they will see more improvement, and also because the values in the middle are somewhat
-            likely to get no improvement at all (because the base values might move in different directions)?
-            OTOH it would mean the values in the middle are even less likely to get an improvement
-         */
-         for (j = 0; j < numypixels; j++) {
-            for (i = 0; i < numxpixels; i++) {
-               if (srccolors[j][i] <= alphatest[0] / 2) {
-               }
-               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
-               }
-               else if (srccolors[j][i] <= acutValues[0]) {
-                  blockerrlin1 += (srccolors[j][i] - alphatest[0]);
-                  nralphainrangelow += 1;
-               }
-               else if (srccolors[j][i] <= acutValues[1]) {
-                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
-                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
-                  nralphainrangelow += 1;
-                  nralphainrangehigh += 1;
-               }
-               else if (srccolors[j][i] <= acutValues[2]) {
-                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
-                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
-                  nralphainrangelow += 1;
-                  nralphainrangehigh += 1;
-               }
-               else if (srccolors[j][i] <= acutValues[3]) {
-                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
-                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
-                  nralphainrangelow += 1;
-                  nralphainrangehigh += 1;
-               }
-               else if (srccolors[j][i] <= acutValues[4]) {
-                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
-                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
-                  nralphainrangelow += 1;
-                  nralphainrangehigh += 1;
-                  }
-               else {
-                  blockerrlin2 += (srccolors[j][i] - alphatest[1]);
-                  nralphainrangehigh += 1;
-               }
-            }
-         }
-         /* shouldn't happen often, needed to avoid div by zero */
-         if (nralphainrangelow == 0) nralphainrangelow = 1;
-         if (nralphainrangehigh == 0) nralphainrangehigh = 1;
-         alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
-#if RGTC_DEBUG
-         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
-         fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);
-#endif
-         /* again shouldn't really happen often... */
-         if (alphatest[0] < 0) {
-            alphatest[0] = 0;
-         }
-         alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
-         if (alphatest[1] > 255) {
-            alphatest[1] = 255;
-         }
-
-         alphablockerror3 = 0;
-         for (aindex = 0; aindex < 5; aindex++) {
-         /* don't forget here is always rounded down */
-            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
-         }
-         for (j = 0; j < numypixels; j++) {
-            for (i = 0; i < numxpixels; i++) {
-                /* maybe it's overkill to have the most complicated calculation just for the error
-                  calculation which we only need to figure out if encoding1 or encoding2 is better... */
-               if (srccolors[j][i] <= alphatest[0] / 2) {
-                  alphaenc3[4*j + i] = 6;
-                  alphadist = srccolors[j][i];
-               }
-               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
-                  alphaenc3[4*j + i] = 7;
-                  alphadist = 255 - srccolors[j][i];
-               }
-               else if (srccolors[j][i] <= acutValues[0]) {
-                  alphaenc3[4*j + i] = 0;
-                  alphadist = srccolors[j][i] - alphatest[0];
-               }
-               else if (srccolors[j][i] <= acutValues[1]) {
-                 alphaenc3[4*j + i] = 2;
-                 alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
-               }
-               else if (srccolors[j][i] <= acutValues[2]) {
-                  alphaenc3[4*j + i] = 3;
-                  alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
-               }
-               else if (srccolors[j][i] <= acutValues[3]) {
-                  alphaenc3[4*j + i] = 4;
-                  alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
-               }
-               else if (srccolors[j][i] <= acutValues[4]) {
-                  alphaenc3[4*j + i] = 5;
-                  alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
-               }
-               else {
-                  alphaenc3[4*j + i] = 1;
-                  alphadist = srccolors[j][i] - alphatest[1];
-               }
-               alphablockerror3 += alphadist * alphadist;
-            }
-         }
-      }
-   }
-  /* write the alpha values and encoding back. */
-   if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
-#if RGTC_DEBUG
-      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);
-#endif
-      write_rgtc_encoded_channel( blkaddr, alphause[1], alphause[0], alphaenc1 );
-   }
-   else if (alphablockerror2 <= alphablockerror3) {
-#if RGTC_DEBUG
-      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);
-#endif
-      write_rgtc_encoded_channel( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
-   }
-   else {
-#if RGTC_DEBUG
-      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);
-#endif
-      write_rgtc_encoded_channel( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
-   }
-}
-
-
-static void write_rgtc_encoded_channel_s(GLbyte *blkaddr,
-					 GLbyte alphabase1,
-					 GLbyte alphabase2,
-					 GLbyte alphaenc[16])
-{
-   *blkaddr++ = alphabase1;
-   *blkaddr++ = alphabase2;
-   *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
-   *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
-   *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
-   *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
-   *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
-   *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
-}
-
-static void encode_rgtc_chan_s(GLbyte *blkaddr, GLbyte srccolors[4][4],
-			       GLint numxpixels, GLint numypixels)
-{
-   GLbyte alphabase[2], alphause[2];
-   GLshort alphatest[2] = { 0 };
-   GLuint alphablockerror1, alphablockerror2, alphablockerror3;
-   GLbyte i, j, aindex, acutValues[7];
-   GLbyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
-   GLboolean alphaabsmin = GL_FALSE;
-   GLboolean alphaabsmax = GL_FALSE;
-   GLshort alphadist;
-
-   /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
-   alphabase[0] = 0xff; alphabase[1] = 0x0;
-   for (j = 0; j < numypixels; j++) {
-      for (i = 0; i < numxpixels; i++) {
-         if (srccolors[j][i] == 0)
-            alphaabsmin = GL_TRUE;
-         else if (srccolors[j][i] == 255)
-            alphaabsmax = GL_TRUE;
-         else {
-            if (srccolors[j][i] > alphabase[1])
-               alphabase[1] = srccolors[j][i];
-            if (srccolors[j][i] < alphabase[0])
-               alphabase[0] = srccolors[j][i];
-         }
-      }
-   }
-
-
-   if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
-      /* shortcut here since it is a very common case (and also avoids later problems) */
-      /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
-      /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
-
-      *blkaddr++ = srccolors[0][0];
-      blkaddr++;
-      *blkaddr++ = 0;
-      *blkaddr++ = 0;
-      *blkaddr++ = 0;
-      *blkaddr++ = 0;
-      *blkaddr++ = 0;
-      *blkaddr++ = 0;
-#if RGTC_DEBUG
-      fprintf(stderr, "enc0 used\n");
-#endif
-      return;
-   }
-
-   /* find best encoding for alpha0 > alpha1 */
-   /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
-   alphablockerror1 = 0x0;
-   alphablockerror2 = 0xffffffff;
-   alphablockerror3 = 0xffffffff;
-   if (alphaabsmin) alphause[0] = 0;
-   else alphause[0] = alphabase[0];
-   if (alphaabsmax) alphause[1] = 255;
-   else alphause[1] = alphabase[1];
-   /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
-   for (aindex = 0; aindex < 7; aindex++) {
-      /* don't forget here is always rounded down */
-      acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
-   }
+#define TAG(x) unsigned_##x
 
-   for (j = 0; j < numypixels; j++) {
-      for (i = 0; i < numxpixels; i++) {
-         /* maybe it's overkill to have the most complicated calculation just for the error
-            calculation which we only need to figure out if encoding1 or encoding2 is better... */
-         if (srccolors[j][i] > acutValues[0]) {
-            alphaenc1[4*j + i] = 0;
-            alphadist = srccolors[j][i] - alphause[1];
-         }
-         else if (srccolors[j][i] > acutValues[1]) {
-            alphaenc1[4*j + i] = 2;
-            alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7;
-         }
-         else if (srccolors[j][i] > acutValues[2]) {
-            alphaenc1[4*j + i] = 3;
-            alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7;
-         }
-         else if (srccolors[j][i] > acutValues[3]) {
-            alphaenc1[4*j + i] = 4;
-            alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7;
-         }
-         else if (srccolors[j][i] > acutValues[4]) {
-            alphaenc1[4*j + i] = 5;
-            alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7;
-         }
-         else if (srccolors[j][i] > acutValues[5]) {
-            alphaenc1[4*j + i] = 6;
-            alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7;
-         }
-         else if (srccolors[j][i] > acutValues[6]) {
-            alphaenc1[4*j + i] = 7;
-            alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7;
-         }
-         else {
-            alphaenc1[4*j + i] = 1;
-            alphadist = srccolors[j][i] - alphause[0];
-         }
-         alphablockerror1 += alphadist * alphadist;
-      }
-   }
-#if RGTC_DEBUG
-   for (i = 0; i < 16; i++) {
-      fprintf(stderr, "%d ", alphaenc1[i]);
-   }
-   fprintf(stderr, "cutVals ");
-   for (i = 0; i < 8; i++) {
-      fprintf(stderr, "%d ", acutValues[i]);
-   }
-   fprintf(stderr, "srcVals ");
-   for (j = 0; j < numypixels; j++)
-      for (i = 0; i < numxpixels; i++) {
-	 fprintf(stderr, "%d ", srccolors[j][i]);
-      }
-   
-   fprintf(stderr, "\n");
-#endif
+#define TYPE GLubyte
+#define T_MIN 0
+#define T_MAX 0xff
 
-   /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
-      are false but try it anyway */
-   if (alphablockerror1 >= 32) {
-
-      /* don't bother if encoding is already very good, this condition should also imply
-      we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
-      alphablockerror2 = 0;
-      for (aindex = 0; aindex < 5; aindex++) {
-         /* don't forget here is always rounded down */
-         acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
-      }
-      for (j = 0; j < numypixels; j++) {
-         for (i = 0; i < numxpixels; i++) {
-             /* maybe it's overkill to have the most complicated calculation just for the error
-               calculation which we only need to figure out if encoding1 or encoding2 is better... */
-            if (srccolors[j][i] == 0) {
-               alphaenc2[4*j + i] = 6;
-               alphadist = 0;
-            }
-            else if (srccolors[j][i] == 255) {
-               alphaenc2[4*j + i] = 7;
-               alphadist = 0;
-            }
-            else if (srccolors[j][i] <= acutValues[0]) {
-               alphaenc2[4*j + i] = 0;
-               alphadist = srccolors[j][i] - alphabase[0];
-            }
-            else if (srccolors[j][i] <= acutValues[1]) {
-               alphaenc2[4*j + i] = 2;
-               alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
-            }
-            else if (srccolors[j][i] <= acutValues[2]) {
-               alphaenc2[4*j + i] = 3;
-               alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
-            }
-            else if (srccolors[j][i] <= acutValues[3]) {
-               alphaenc2[4*j + i] = 4;
-               alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
-            }
-            else if (srccolors[j][i] <= acutValues[4]) {
-               alphaenc2[4*j + i] = 5;
-               alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
-            }
-            else {
-               alphaenc2[4*j + i] = 1;
-               alphadist = srccolors[j][i] - alphabase[1];
-            }
-            alphablockerror2 += alphadist * alphadist;
-         }
-      }
+#include "texcompress_rgtc_tmp.h"
 
+#undef TAG
+#undef TYPE
+#undef T_MIN
+#undef T_MAX
 
-      /* skip this if the error is already very small
-         this encoding is MUCH better on average than #2 though, but expensive! */
-      if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
-         GLshort blockerrlin1 = 0;
-         GLshort blockerrlin2 = 0;
-         GLubyte nralphainrangelow = 0;
-         GLubyte nralphainrangehigh = 0;
-         alphatest[0] = 0xff;
-         alphatest[1] = 0x0;
-         /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
-         for (j = 0; j < numypixels; j++) {
-            for (i = 0; i < numxpixels; i++) {
-               if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (255 -(alphabase[1] - alphabase[0]) / 28)))
-                  alphatest[1] = srccolors[j][i];
-               if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28))
-                  alphatest[0] = srccolors[j][i];
-            }
-         }
-          /* shouldn't happen too often, don't really care about those degenerated cases */
-          if (alphatest[1] <= alphatest[0]) {
-             alphatest[0] = 1;
-             alphatest[1] = 254;
-         }
-         for (aindex = 0; aindex < 5; aindex++) {
-         /* don't forget here is always rounded down */
-            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
-         }
+#define TAG(x) signed_##x
+#define TYPE GLbyte
+#define T_MIN (GLbyte)-127
+#define T_MAX (GLbyte)127
 
-         /* find the "average" difference between the alpha values and the next encoded value.
-            This is then used to calculate new base values.
-            Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
-            since they will see more improvement, and also because the values in the middle are somewhat
-            likely to get no improvement at all (because the base values might move in different directions)?
-            OTOH it would mean the values in the middle are even less likely to get an improvement
-         */
-         for (j = 0; j < numypixels; j++) {
-            for (i = 0; i < numxpixels; i++) {
-               if (srccolors[j][i] <= alphatest[0] / 2) {
-               }
-               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
-               }
-               else if (srccolors[j][i] <= acutValues[0]) {
-                  blockerrlin1 += (srccolors[j][i] - alphatest[0]);
-                  nralphainrangelow += 1;
-               }
-               else if (srccolors[j][i] <= acutValues[1]) {
-                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
-                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
-                  nralphainrangelow += 1;
-                  nralphainrangehigh += 1;
-               }
-               else if (srccolors[j][i] <= acutValues[2]) {
-                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
-                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
-                  nralphainrangelow += 1;
-                  nralphainrangehigh += 1;
-               }
-               else if (srccolors[j][i] <= acutValues[3]) {
-                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
-                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
-                  nralphainrangelow += 1;
-                  nralphainrangehigh += 1;
-               }
-               else if (srccolors[j][i] <= acutValues[4]) {
-                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
-                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
-                  nralphainrangelow += 1;
-                  nralphainrangehigh += 1;
-                  }
-               else {
-                  blockerrlin2 += (srccolors[j][i] - alphatest[1]);
-                  nralphainrangehigh += 1;
-               }
-            }
-         }
-         /* shouldn't happen often, needed to avoid div by zero */
-         if (nralphainrangelow == 0) nralphainrangelow = 1;
-         if (nralphainrangehigh == 0) nralphainrangehigh = 1;
-         alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
-#if RGTC_DEBUG
-	 fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
-         fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);
-#endif
-         /* again shouldn't really happen often... */
-         if (alphatest[0] < 0) {
-            alphatest[0] = 0;
-         }
-         alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
-         if (alphatest[1] > 255) {
-            alphatest[1] = 255;
-         }
+#include "texcompress_rgtc_tmp.h"
 
-         alphablockerror3 = 0;
-         for (aindex = 0; aindex < 5; aindex++) {
-         /* don't forget here is always rounded down */
-            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
-         }
-         for (j = 0; j < numypixels; j++) {
-            for (i = 0; i < numxpixels; i++) {
-                /* maybe it's overkill to have the most complicated calculation just for the error
-                  calculation which we only need to figure out if encoding1 or encoding2 is better... */
-               if (srccolors[j][i] <= alphatest[0] / 2) {
-                  alphaenc3[4*j + i] = 6;
-                  alphadist = srccolors[j][i];
-               }
-               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
-                  alphaenc3[4*j + i] = 7;
-                  alphadist = 255 - srccolors[j][i];
-               }
-               else if (srccolors[j][i] <= acutValues[0]) {
-                  alphaenc3[4*j + i] = 0;
-                  alphadist = srccolors[j][i] - alphatest[0];
-               }
-               else if (srccolors[j][i] <= acutValues[1]) {
-                 alphaenc3[4*j + i] = 2;
-                 alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
-               }
-               else if (srccolors[j][i] <= acutValues[2]) {
-                  alphaenc3[4*j + i] = 3;
-                  alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
-               }
-               else if (srccolors[j][i] <= acutValues[3]) {
-                  alphaenc3[4*j + i] = 4;
-                  alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
-               }
-               else if (srccolors[j][i] <= acutValues[4]) {
-                  alphaenc3[4*j + i] = 5;
-                  alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
-               }
-               else {
-                  alphaenc3[4*j + i] = 1;
-                  alphadist = srccolors[j][i] - alphatest[1];
-               }
-               alphablockerror3 += alphadist * alphadist;
-            }
-         }
-      }
-   }
-  /* write the alpha values and encoding back. */
-   if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
-#if RGTC_DEBUG
-      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);
-#endif
-      write_rgtc_encoded_channel_s( blkaddr, alphause[1], alphause[0], alphaenc1 );
-   }
-   else if (alphablockerror2 <= alphablockerror3) {
-#if RGTC_DEBUG
-      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);
-#endif
-      write_rgtc_encoded_channel_s( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
-   }
-   else {
-#if RGTC_DEBUG
-      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);
-#endif
-      write_rgtc_encoded_channel_s( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
-   }
-}
+#undef TAG
+#undef TYPE
+#undef T_MIN
+#undef T_MAX
diff --git a/src/mesa/main/texcompress_rgtc_tmp.h b/src/mesa/main/texcompress_rgtc_tmp.h
new file mode 100644
index 0000000000..0f830a5d90
--- /dev/null
+++ b/src/mesa/main/texcompress_rgtc_tmp.h
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2011 Red Hat Inc.
+ * 
+ * block compression parts are:
+ * Copyright (C) 2004  Roland Scheidegger   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author:
+ *    Dave Airlie
+ */
+
+/* included by texcompress_rgtc to define byte/ubyte compressors */
+
+static void TAG(write_rgtc_encoded_channel)(TYPE *blkaddr,
+					    TYPE alphabase1,
+					    TYPE alphabase2,
+					    TYPE alphaenc[16])
+{
+   *blkaddr++ = alphabase1;
+   *blkaddr++ = alphabase2;
+   *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
+   *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
+   *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
+   *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
+   *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
+   *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
+}
+static void TAG(encode_rgtc_chan)(TYPE *blkaddr, TYPE srccolors[4][4],
+			     GLint numxpixels, GLint numypixels)
+{
+   TYPE alphabase[2], alphause[2];
+   GLshort alphatest[2] = { 0 };
+   GLuint alphablockerror1, alphablockerror2, alphablockerror3;
+   TYPE i, j, aindex, acutValues[7];
+   TYPE alphaenc1[16], alphaenc2[16], alphaenc3[16];
+   GLboolean alphaabsmin = GL_FALSE;
+   GLboolean alphaabsmax = GL_FALSE;
+   GLshort alphadist;
+
+   /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
+   alphabase[0] = T_MAX; alphabase[1] = T_MIN;
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+	 if (srccolors[j][i] == T_MIN)
+            alphaabsmin = GL_TRUE;
+         else if (srccolors[j][i] == T_MAX)
+            alphaabsmax = GL_TRUE;
+         else {
+            if (srccolors[j][i] > alphabase[1])
+               alphabase[1] = srccolors[j][i];
+            if (srccolors[j][i] < alphabase[0])
+               alphabase[0] = srccolors[j][i];
+         }
+      }
+   }
+
+
+   if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
+      /* shortcut here since it is a very common case (and also avoids later problems) */
+      /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
+      /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
+
+      *blkaddr++ = srccolors[0][0];
+      blkaddr++;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+#if RGTC_DEBUG
+      fprintf(stderr, "enc0 used\n");
+#endif
+      return;
+   }
+
+   /* find best encoding for alpha0 > alpha1 */
+   /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
+   alphablockerror1 = 0x0;
+   alphablockerror2 = 0xffffffff;
+   alphablockerror3 = 0xffffffff;
+   if (alphaabsmin) alphause[0] = T_MIN;
+   else alphause[0] = alphabase[0];
+   if (alphaabsmax) alphause[1] = T_MAX;
+   else alphause[1] = alphabase[1];
+   /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
+   for (aindex = 0; aindex < 7; aindex++) {
+      /* don't forget here is always rounded down */
+      acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
+   }
+
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+         /* maybe it's overkill to have the most complicated calculation just for the error
+            calculation which we only need to figure out if encoding1 or encoding2 is better... */
+         if (srccolors[j][i] > acutValues[0]) {
+            alphaenc1[4*j + i] = 0;
+            alphadist = srccolors[j][i] - alphause[1];
+         }
+         else if (srccolors[j][i] > acutValues[1]) {
+            alphaenc1[4*j + i] = 2;
+            alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[2]) {
+            alphaenc1[4*j + i] = 3;
+            alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[3]) {
+            alphaenc1[4*j + i] = 4;
+            alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[4]) {
+            alphaenc1[4*j + i] = 5;
+            alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[5]) {
+            alphaenc1[4*j + i] = 6;
+            alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[6]) {
+            alphaenc1[4*j + i] = 7;
+            alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7;
+         }
+         else {
+            alphaenc1[4*j + i] = 1;
+            alphadist = srccolors[j][i] - alphause[0];
+         }
+         alphablockerror1 += alphadist * alphadist;
+      }
+   }
+
+#if RGTC_DEBUG
+   for (i = 0; i < 16; i++) {
+      fprintf(stderr, "%d ", alphaenc1[i]);
+   }
+   fprintf(stderr, "cutVals ");
+   for (i = 0; i < 8; i++) {
+      fprintf(stderr, "%d ", acutValues[i]);
+   }
+   fprintf(stderr, "srcVals ");
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+	 fprintf(stderr, "%d ", srccolors[j][i]);
+      }
+   }
+   fprintf(stderr, "\n");
+#endif
+
+   /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
+      are false but try it anyway */
+   if (alphablockerror1 >= 32) {
+
+      /* don't bother if encoding is already very good, this condition should also imply
+      we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
+      alphablockerror2 = 0;
+      for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+         acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
+      }
+      for (j = 0; j < numypixels; j++) {
+         for (i = 0; i < numxpixels; i++) {
+             /* maybe it's overkill to have the most complicated calculation just for the error
+               calculation which we only need to figure out if encoding1 or encoding2 is better... */
+            if (srccolors[j][i] == T_MIN) {
+               alphaenc2[4*j + i] = 6;
+               alphadist = 0;
+            }
+            else if (srccolors[j][i] == T_MAX) {
+               alphaenc2[4*j + i] = 7;
+               alphadist = 0;
+            }
+            else if (srccolors[j][i] <= acutValues[0]) {
+               alphaenc2[4*j + i] = 0;
+               alphadist = srccolors[j][i] - alphabase[0];
+            }
+            else if (srccolors[j][i] <= acutValues[1]) {
+               alphaenc2[4*j + i] = 2;
+               alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[2]) {
+               alphaenc2[4*j + i] = 3;
+               alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[3]) {
+               alphaenc2[4*j + i] = 4;
+               alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[4]) {
+               alphaenc2[4*j + i] = 5;
+               alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
+            }
+            else {
+               alphaenc2[4*j + i] = 1;
+               alphadist = srccolors[j][i] - alphabase[1];
+            }
+            alphablockerror2 += alphadist * alphadist;
+         }
+      }
+
+
+      /* skip this if the error is already very small
+         this encoding is MUCH better on average than #2 though, but expensive! */
+      if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
+         GLshort blockerrlin1 = 0;
+         GLshort blockerrlin2 = 0;
+         TYPE nralphainrangelow = 0;
+         TYPE nralphainrangehigh = 0;
+         alphatest[0] = 0xff;
+         alphatest[1] = 0x0;
+         /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+               if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (T_MAX -(alphabase[1] - alphabase[0]) / 28)))
+                  alphatest[1] = srccolors[j][i];
+               if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28))
+                  alphatest[0] = srccolors[j][i];
+            }
+         }
+          /* shouldn't happen too often, don't really care about those degenerated cases */
+          if (alphatest[1] <= alphatest[0]) {
+             alphatest[0] = 1;
+             alphatest[1] = 254;
+         }
+         for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+         }
+
+         /* find the "average" difference between the alpha values and the next encoded value.
+            This is then used to calculate new base values.
+            Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
+            since they will see more improvement, and also because the values in the middle are somewhat
+            likely to get no improvement at all (because the base values might move in different directions)?
+            OTOH it would mean the values in the middle are even less likely to get an improvement
+         */
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+               if (srccolors[j][i] <= alphatest[0] / 2) {
+               }
+               else if (srccolors[j][i] > ((T_MAX + alphatest[1]) / 2)) {
+               }
+               else if (srccolors[j][i] <= acutValues[0]) {
+                  blockerrlin1 += (srccolors[j][i] - alphatest[0]);
+                  nralphainrangelow += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[1]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[2]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[3]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[4]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+                  }
+               else {
+                  blockerrlin2 += (srccolors[j][i] - alphatest[1]);
+                  nralphainrangehigh += 1;
+               }
+            }
+         }
+         /* shouldn't happen often, needed to avoid div by zero */
+         if (nralphainrangelow == 0) nralphainrangelow = 1;
+         if (nralphainrangehigh == 0) nralphainrangehigh = 1;
+         alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
+#if RGTC_DEBUG
+         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
+         fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);
+#endif
+         /* again shouldn't really happen often... */
+         if (alphatest[0] < T_MIN) {
+            alphatest[0] = T_MIN;
+         }
+         alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
+         if (alphatest[1] > T_MAX) {
+            alphatest[1] = T_MIN;
+         }
+
+         alphablockerror3 = 0;
+         for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+         }
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+                /* maybe it's overkill to have the most complicated calculation just for the error
+                  calculation which we only need to figure out if encoding1 or encoding2 is better... */
+               if (srccolors[j][i] <= alphatest[0] / 2) {
+                  alphaenc3[4*j + i] = 6;
+                  alphadist = srccolors[j][i];
+               }
+               else if (srccolors[j][i] > ((T_MAX + alphatest[1]) / 2)) {
+                  alphaenc3[4*j + i] = 7;
+                  alphadist = T_MAX - srccolors[j][i];
+               }
+               else if (srccolors[j][i] <= acutValues[0]) {
+                  alphaenc3[4*j + i] = 0;
+                  alphadist = srccolors[j][i] - alphatest[0];
+               }
+               else if (srccolors[j][i] <= acutValues[1]) {
+                 alphaenc3[4*j + i] = 2;
+                 alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[2]) {
+                  alphaenc3[4*j + i] = 3;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[3]) {
+                  alphaenc3[4*j + i] = 4;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[4]) {
+                  alphaenc3[4*j + i] = 5;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
+               }
+               else {
+                  alphaenc3[4*j + i] = 1;
+                  alphadist = srccolors[j][i] - alphatest[1];
+               }
+               alphablockerror3 += alphadist * alphadist;
+            }
+         }
+      }
+   }
+  /* write the alpha values and encoding back. */
+   if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
+#if RGTC_DEBUG
+      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);
+#endif
+      TAG(write_rgtc_encoded_channel)( blkaddr, alphause[1], alphause[0], alphaenc1 );
+   }
+   else if (alphablockerror2 <= alphablockerror3) {
+#if RGTC_DEBUG
+      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);
+#endif
+      TAG(write_rgtc_encoded_channel)( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
+   }
+   else {
+#if RGTC_DEBUG
+      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);
+#endif
+      TAG(write_rgtc_encoded_channel)( blkaddr, (TYPE)alphatest[0], (TYPE)alphatest[1], alphaenc3 );
+   }
+}
-- 
cgit v1.2.3