6 files changed, 221 insertions, 40 deletions
diff --git a/src/mesa/drivers/dri/savage/savage_xmesa.c b/src/mesa/drivers/dri/savage/savage_xmesa.c
index f1798de134..e269705073 100644
--- a/src/mesa/drivers/dri/savage/savage_xmesa.c
+++ b/src/mesa/drivers/dri/savage/savage_xmesa.c
@@ -64,6 +64,7 @@ DRI_CONF_BEGIN
     DRI_CONF_SECTION_QUALITY
         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
         DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
+        DRI_CONF_FLOAT_DEPTH(false)
     DRI_CONF_SECTION_END
     DRI_CONF_SECTION_PERFORMANCE
         DRI_CONF_MAX_TEXTURE_UNITS(2,1,2)
@@ -72,7 +73,7 @@ DRI_CONF_BEGIN
         DRI_CONF_NO_RAST(false)
     DRI_CONF_SECTION_END
 DRI_CONF_END;
-static const GLuint __driNConfigOptions = 4;
+static const GLuint __driNConfigOptions = 5;
 
 #ifdef USE_NEW_INTERFACE
 static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
@@ -308,6 +309,9 @@ savageCreateContext( const __GLcontextModes *mesaVis,
    driParseConfigFiles (&imesa->optionCache, &savageScreen->optionCache,
                         sPriv->myNum, "savage");
 
+   imesa->float_depth = driQueryOptionb(&imesa->optionCache, "float_depth") &&
+       savageScreen->chipset >= S3_SAVAGE4;
+   imesa->no_rast = driQueryOptionb(&imesa->optionCache, "no_rast");
    imesa->texture_depth = driQueryOptioni (&imesa->optionCache,
 					   "texture_depth");
    if (imesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
@@ -407,7 +411,7 @@ savageCreateContext( const __GLcontextModes *mesaVis,
 
    imesa->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
    imesa->depth_scale = (imesa->savageScreen->zpp == 2) ?
-       (1.0F/0x10000):(1.0F/0x1000000);
+       (1.0F/0xffff):(1.0F/0xffffff);
 
    imesa->bufferSize = savageScreen->bufferSize;
    imesa->dmaVtxBuf.total = 0;
@@ -479,8 +483,6 @@ savageCreateContext( const __GLcontextModes *mesaVis,
 
    savageDDInitState( imesa );
 
-   imesa->no_rast = driQueryOptionb(&imesa->optionCache, "no_rast");
-
    driContextPriv->driverPrivate = (void *) imesa;
 
    return GL_TRUE;
diff --git a/src/mesa/drivers/dri/savage/savagecontext.h b/src/mesa/drivers/dri/savage/savagecontext.h
index 8978f9247c..8d77d1851d 100644
--- a/src/mesa/drivers/dri/savage/savagecontext.h
+++ b/src/mesa/drivers/dri/savage/savagecontext.h
@@ -294,6 +294,7 @@ struct savage_context_t {
     driOptionCache optionCache;
     GLint texture_depth;
     GLboolean no_rast;
+    GLboolean float_depth;
 };
 
 #define SAVAGE_CONTEXT(ctx) ((savageContextPtr)(ctx->DriverCtx))
diff --git a/src/mesa/drivers/dri/savage/savageioctl.c b/src/mesa/drivers/dri/savage/savageioctl.c
index dd7a84e2e7..62e7142d24 100644
--- a/src/mesa/drivers/dri/savage/savageioctl.c
+++ b/src/mesa/drivers/dri/savage/savageioctl.c
@@ -38,6 +38,7 @@
 #include "savageioctl.h"
 #include "savage_bci.h"
 #include "savagestate.h"
+#include "savagespan.h"
 
 #include "drm.h"
 #include <sys/ioctl.h>
@@ -333,10 +334,17 @@ static void savageDDClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
        fprintf (stderr, "%s\n", __FUNCTION__);
 
    clearColor = imesa->ClearColor;
-   if(imesa->savageScreen->zpp == 2)
-       clearDepth = (GLuint) (ctx->Depth.Clear * DEPTH_SCALE_16);
-   else
-       clearDepth = (GLuint) (ctx->Depth.Clear * DEPTH_SCALE_24);
+   if (imesa->float_depth) {
+       if (imesa->savageScreen->zpp == 2)
+	   clearDepth = savageEncodeFloat16(1.0 - ctx->Depth.Clear);
+       else
+	   clearDepth = savageEncodeFloat24(1.0 - ctx->Depth.Clear);
+   } else {
+       if (imesa->savageScreen->zpp == 2)
+	   clearDepth = (GLuint) ((1.0 - ctx->Depth.Clear) * DEPTH_SCALE_16);
+       else
+	   clearDepth = (GLuint) ((1.0 - ctx->Depth.Clear) * DEPTH_SCALE_24);
+   }
 
    colorMask = *((GLuint *) &ctx->Color.ColorMask);
    depthMask = 0;
diff --git a/src/mesa/drivers/dri/savage/savagespan.c b/src/mesa/drivers/dri/savage/savagespan.c
index 0caee3a652..6cda13cc29 100644
--- a/src/mesa/drivers/dri/savage/savagespan.c
+++ b/src/mesa/drivers/dri/savage/savagespan.c
@@ -158,13 +158,14 @@ do {								\
 
 
 
-/* 16 bit depthbuffer functions.
+/* 16 bit integer depthbuffer functions
+ * Depth range is reversed. See also savageCalcViewport.
  */
 #define WRITE_DEPTH( _x, _y, d ) \
-    *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) = d
+    *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) = 0xFFFF - d
 
 #define READ_DEPTH( d, _x, _y ) \
-    d = *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch)
+    d = 0xFFFF - *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch)
 
 #define TAG(x) savage##x##_16
 #include "depthtmp.h"
@@ -173,22 +174,62 @@ do {								\
 
 
 
-/* 8-bit stencil /24-bit depth depthbuffer functions.
+/* 16 bit float depthbuffer functions
+ */
+#define WRITE_DEPTH( _x, _y, d ) \
+    *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) = \
+        savageEncodeFloat16( 1.0 - (GLfloat)d/65535.0 )
+
+#define READ_DEPTH( d, _x, _y ) \
+    d = 65535 - \
+        savageDecodeFloat16( *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) ) * \
+	65535.0
+
+#define TAG(x) savage##x##_16f
+#include "depthtmp.h"
+
+
+
+
+
+/* 8-bit stencil /24-bit integer depth depthbuffer functions.
+ * Depth range is reversed. See also savageCalcViewport.
  */
 #define WRITE_DEPTH( _x, _y, d ) do {				\
    GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch);	\
    tmp &= 0xFF000000;						\
-   tmp |= d;							\
+   tmp |= 0x00FFFFFF - d;					\
    *(GLuint *)(buf + (_x<<2) + _y*pitch)  = tmp;		\
 } while(0)
 
 #define READ_DEPTH( d, _x, _y )	\
-   d = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch)
+   d = 0x00FFFFFF - (*(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) & 0x00FFFFFF)
 
 #define TAG(x) savage##x##_8_24
 #include "depthtmp.h"
 
 
+
+
+
+/* 24 bit float depthbuffer functions
+ */
+#define WRITE_DEPTH( _x, _y, d ) do {				\
+    GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch);	\
+    tmp &= 0xFF000000;						\
+    tmp |= savageEncodeFloat24( 1.0 - (GLfloat)d/16777215.0 );	\
+   *(GLuint *)(buf + (_x<<2) + _y*pitch)  = tmp;		\
+} while(0)
+
+#define READ_DEPTH( d, _x, _y )					\
+    d = 16777215 - savageDecodeFloat24(				\
+	*(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) & 0x00FFFFFF)	\
+	* 16777215.0
+
+#define TAG(x) savage##x##_8_24f
+#include "depthtmp.h"
+
+
 #define WRITE_STENCIL( _x, _y, d ) do {				\
    GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch);	\
    tmp &= 0x00FFFFFF;						\
@@ -315,20 +356,36 @@ void savageDDInitSpanFuncs( GLcontext *ctx )
 
    switch (imesa->savageScreen->zpp)
    {
-   case 2: 
-       swdd->ReadDepthSpan = savageReadDepthSpan_16;
-       swdd->WriteDepthSpan = savageWriteDepthSpan_16;
-       swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_16;
-       swdd->ReadDepthPixels = savageReadDepthPixels_16;
-       swdd->WriteDepthPixels = savageWriteDepthPixels_16;
+   case 2:
+       if (imesa->float_depth) {
+	   swdd->ReadDepthSpan = savageReadDepthSpan_16f;
+	   swdd->WriteDepthSpan = savageWriteDepthSpan_16f;
+	   swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_16f;
+	   swdd->ReadDepthPixels = savageReadDepthPixels_16f;
+	   swdd->WriteDepthPixels = savageWriteDepthPixels_16f;
+       } else {
+	   swdd->ReadDepthSpan = savageReadDepthSpan_16;
+	   swdd->WriteDepthSpan = savageWriteDepthSpan_16;
+	   swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_16;
+	   swdd->ReadDepthPixels = savageReadDepthPixels_16;
+	   swdd->WriteDepthPixels = savageWriteDepthPixels_16;
+       }
        
        break;
    case 4: 
-       swdd->ReadDepthSpan = savageReadDepthSpan_8_24;
-       swdd->WriteDepthSpan = savageWriteDepthSpan_8_24;
-       swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_8_24;
-       swdd->ReadDepthPixels = savageReadDepthPixels_8_24;
-       swdd->WriteDepthPixels = savageWriteDepthPixels_8_24;    
+       if (imesa->float_depth) {
+	   swdd->ReadDepthSpan = savageReadDepthSpan_8_24f;
+	   swdd->WriteDepthSpan = savageWriteDepthSpan_8_24f;
+	   swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_8_24f;
+	   swdd->ReadDepthPixels = savageReadDepthPixels_8_24f;
+	   swdd->WriteDepthPixels = savageWriteDepthPixels_8_24f;    
+       } else {
+	   swdd->ReadDepthSpan = savageReadDepthSpan_8_24;
+	   swdd->WriteDepthSpan = savageWriteDepthSpan_8_24;
+	   swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_8_24;
+	   swdd->ReadDepthPixels = savageReadDepthPixels_8_24;
+	   swdd->WriteDepthPixels = savageWriteDepthPixels_8_24;    
+       }
        swdd->ReadStencilSpan = savageReadStencilSpan_8_24;
        swdd->WriteStencilSpan = savageWriteStencilSpan_8_24;
        swdd->ReadStencilPixels = savageReadStencilPixels_8_24;
diff --git a/src/mesa/drivers/dri/savage/savagespan.h b/src/mesa/drivers/dri/savage/savagespan.h
index 35247b4706..cb3a1b52fd 100644
--- a/src/mesa/drivers/dri/savage/savagespan.h
+++ b/src/mesa/drivers/dri/savage/savagespan.h
@@ -27,4 +27,102 @@
 
 extern void savageDDInitSpanFuncs( GLcontext *ctx );
 
+/*
+ * Savage 16-bit float depth format with zExpOffset=16:
+ *   4 bit unsigned exponent, 12 bit mantissa
+ *
+ * The meaning of the mantissa is different from IEEE floatint point
+ * formats. The same number can't be encoded with different exponents.
+ * So no bits are wasted.
+ *
+ * exponent | range encoded by mantissa | accuracy or mantissa
+ * ---------+---------------------------+---------------------
+ *       15 | 2^-1 .. 1                 | 2^-13
+ *       14 | 2^-2 .. 2^-1              | 2^-14
+ *       13 | 2^-3 .. 2^-2              | 2^-15
+ *      ... | ...                       |
+ *        2 | 2^-14 .. 2^-13            | 2^-27
+ *        1 | 2^-15 .. 2^-14            | 2^-27
+ *        0 | 2^-16 .. 2^-15            | 2^-28
+ *
+ * Note that there is no encoding for numbers < 2^-16.
+ */
+static __inline GLuint savageEncodeFloat16( GLdouble x )
+{
+    GLint r = (GLint)(x * 0x10000000);
+    GLint exp = 0;
+    if (r < 0x1000)
+	return 0;
+    while (r - 0x1000 > 0x0fff) {
+	r >>= 1;
+	exp++;
+    }
+    return exp > 0xf ? 0xffff : (r - 0x1000) | (exp << 12);
+}
+static __inline GLdouble savageDecodeFloat16( GLuint x )
+{
+    static const GLdouble pow2[16] = {
+	1.0/(1<<28), 1.0/(1<<27), 1.0/(1<<26), 1.0/(1<<25),
+	1.0/(1<<24), 1.0/(1<<23), 1.0/(1<<22), 1.0/(1<<21),
+	1.0/(1<<20), 1.0/(1<<19), 1.0/(1<<18), 1.0/(1<<17),
+	1.0/(1<<16), 1.0/(1<<15), 1.0/(1<<14), 1.0/(1<<13)
+    };
+    static const GLdouble bias[16] = {
+	1.0/(1<<16), 1.0/(1<<15), 1.0/(1<<14), 1.0/(1<<13),
+	1.0/(1<<12), 1.0/(1<<11), 1.0/(1<<10), 1.0/(1<< 9),
+	1.0/(1<< 8), 1.0/(1<< 7), 1.0/(1<< 6), 1.0/(1<< 5),
+	1.0/(1<< 4), 1.0/(1<< 3), 1.0/(1<< 2), 1.0/(1<< 1)
+    };
+    GLuint mant = x & 0x0fff;
+    GLuint exp = (x >> 12) & 0xf;
+    return bias[exp] + pow2[exp]*mant;
+}
+
+/*
+ * Savage 24-bit float depth format with zExpOffset=32:
+ *   5 bit unsigned exponent, 19 bit mantissa
+ *
+ * Details analogous to the 16-bit format.
+ */
+static __inline GLuint savageEncodeFloat24( GLdouble x )
+{
+    int64_t r = (int64_t)(x * ((int64_t)1 << (19+32)));
+    GLint exp = 0;
+    if (r < 0x80000)
+	return 0;
+    while (r - 0x80000 > 0x7ffff) {
+	r >>= 1;
+	exp++;
+    }
+    return exp > 0x1f ? 0xffffff : (r - 0x80000) | (exp << 19);
+}
+#define _1 (int64_t)1
+static __inline GLdouble savageDecodeFloat24( GLuint x )
+{
+    static const GLdouble pow2[32] = {
+	1.0/(_1<<51), 1.0/(_1<<50), 1.0/(_1<<49), 1.0/(_1<<48),
+	1.0/(_1<<47), 1.0/(_1<<46), 1.0/(_1<<45), 1.0/(_1<<44),
+	1.0/(_1<<43), 1.0/(_1<<42), 1.0/(_1<<41), 1.0/(_1<<40),
+	1.0/(_1<<39), 1.0/(_1<<38), 1.0/(_1<<37), 1.0/(_1<<36),
+	1.0/(_1<<35), 1.0/(_1<<34), 1.0/(_1<<33), 1.0/(_1<<32),
+	1.0/(_1<<31), 1.0/(_1<<30), 1.0/(_1<<29), 1.0/(_1<<28),
+	1.0/(_1<<27), 1.0/(_1<<26), 1.0/(_1<<25), 1.0/(_1<<24),
+	1.0/(_1<<23), 1.0/(_1<<22), 1.0/(_1<<21), 1.0/(_1<<20)
+    };
+    static const GLdouble bias[32] = {
+	1.0/(_1<<32), 1.0/(_1<<31), 1.0/(_1<<30), 1.0/(_1<<29),
+	1.0/(_1<<28), 1.0/(_1<<27), 1.0/(_1<<26), 1.0/(_1<<25),
+	1.0/(_1<<24), 1.0/(_1<<23), 1.0/(_1<<22), 1.0/(_1<<21),
+	1.0/(_1<<20), 1.0/(_1<<19), 1.0/(_1<<18), 1.0/(_1<<17),
+	1.0/(_1<<16), 1.0/(_1<<15), 1.0/(_1<<14), 1.0/(_1<<13),
+	1.0/(_1<<12), 1.0/(_1<<11), 1.0/(_1<<10), 1.0/(_1<< 9),
+	1.0/(_1<< 8), 1.0/(_1<< 7), 1.0/(_1<< 6), 1.0/(_1<< 5),
+	1.0/(_1<< 4), 1.0/(_1<< 3), 1.0/(_1<< 2), 1.0/(_1<< 1)
+    };
+    GLuint mant = x & 0x7ffff;
+    GLuint exp = (x >> 19) & 0x1f;
+    return bias[exp] + pow2[exp]*mant;
+}
+#undef _1
+
 #endif
diff --git a/src/mesa/drivers/dri/savage/savagestate.c b/src/mesa/drivers/dri/savage/savagestate.c
index d6048291be..ec3b5c4adc 100644
--- a/src/mesa/drivers/dri/savage/savagestate.c
+++ b/src/mesa/drivers/dri/savage/savagestate.c
@@ -470,14 +470,14 @@ static void savageDDDepthFunc_s4(GLcontext *ctx, GLenum func)
      * set up z read/write watermarks register (global)
      */
 
-    switch(func)  { 
+    switch(func)  { /* reversed (see savageCalcViewport) */
     case GL_NEVER: zmode = CF_Never; break;
     case GL_ALWAYS: zmode = CF_Always; break;
-    case GL_LESS: zmode = CF_Less; break; 
-    case GL_LEQUAL: zmode = CF_LessEqual; break;
+    case GL_LESS: zmode = CF_Greater; break; 
+    case GL_LEQUAL: zmode = CF_GreaterEqual; break;
     case GL_EQUAL: zmode = CF_Equal; break;
-    case GL_GREATER: zmode = CF_Greater; break;
-    case GL_GEQUAL: zmode = CF_GreaterEqual; break;
+    case GL_GREATER: zmode = CF_Less; break;
+    case GL_GEQUAL: zmode = CF_LessEqual; break;
     case GL_NOTEQUAL: zmode = CF_NotEqual; break;
     default:return;
     } 
@@ -539,14 +539,14 @@ static void savageDDDepthFunc_s3d(GLcontext *ctx, GLenum func)
      * set up z-buffer offset register (global)
      * set up z read/write watermarks register (global)
      */
-    switch(func)  { 
+    switch(func)  { /* reversed (see savageCalcViewport) */
     case GL_NEVER: zmode = CF_Never; break;
     case GL_ALWAYS: zmode = CF_Always; break;
-    case GL_LESS: zmode = CF_Less; break; 
-    case GL_LEQUAL: zmode = CF_LessEqual; break;
+    case GL_LESS: zmode = CF_Greater; break; 
+    case GL_LEQUAL: zmode = CF_GreaterEqual; break;
     case GL_EQUAL: zmode = CF_Equal; break;
-    case GL_GREATER: zmode = CF_Greater; break;
-    case GL_GEQUAL: zmode = CF_GreaterEqual; break;
+    case GL_GREATER: zmode = CF_Less; break;
+    case GL_GEQUAL: zmode = CF_LessEqual; break;
     case GL_NOTEQUAL: zmode = CF_NotEqual; break;
     default:return;
     } 
@@ -716,14 +716,22 @@ static void savageCalcViewport( GLcontext *ctx )
    const GLfloat *v = ctx->Viewport._WindowMap.m;
    GLfloat *m = imesa->hw_viewport;
 
-   /* See also mga_translate_vertex.
-    */
    m[MAT_SX] =   v[MAT_SX];
    m[MAT_TX] =   v[MAT_TX] + imesa->drawX + SUBPIXEL_X;
    m[MAT_SY] = - v[MAT_SY];
    m[MAT_TY] = - v[MAT_TY] + imesa->driDrawable->h + imesa->drawY + SUBPIXEL_Y;
-   m[MAT_SZ] =   v[MAT_SZ] * imesa->depth_scale;
-   m[MAT_TZ] =   v[MAT_TZ] * imesa->depth_scale;
+   /* Depth range is reversed (far: 0, near: 1) so that float depth
+    * compensates for loss of accuracy of far coordinates. */
+   if (imesa->float_depth && imesa->savageScreen->zpp == 2) {
+       /* The Savage 16-bit floating point depth format can't encode
+	* numbers < 2^-16. Make sure all depth values stay greater
+	* than that. */
+       m[MAT_SZ] = - v[MAT_SZ] * imesa->depth_scale * (65535.0/65536.0);
+       m[MAT_TZ] = 1.0 - v[MAT_TZ] * imesa->depth_scale * (65535.0/65536.0);
+   } else {
+       m[MAT_SZ] = - v[MAT_SZ] * imesa->depth_scale;
+       m[MAT_TZ] = 1.0 - v[MAT_TZ] * imesa->depth_scale;
+   }
 
    imesa->SetupNewInputs = ~0;
 }
@@ -1612,7 +1620,14 @@ static void savageDDInitState_s4( savageContextPtr imesa )
 
     imesa->regs.s4.zBufCtrl.ni.zCmpFunc = CF_Less;
     imesa->regs.s4.zBufCtrl.ni.wToZEn               = GL_TRUE;
-    /*imesa->regs.s4.ZBufCtrl.ni.floatZEn          = GL_TRUE;*/
+    if (imesa->float_depth) {
+	imesa->regs.s4.zBufCtrl.ni.zExpOffset =
+	    imesa->savageScreen->zpp == 2 ? 16 : 32;
+	imesa->regs.s4.zBufCtrl.ni.floatZEn = GL_TRUE;
+    } else {
+	imesa->regs.s4.zBufCtrl.ni.zExpOffset = 0;
+	imesa->regs.s4.zBufCtrl.ni.floatZEn = GL_FALSE;
+    }
     imesa->regs.s4.texBlendCtrl[0].ui            = TBC_NoTexMap;
     imesa->regs.s4.texBlendCtrl[1].ui            = TBC_NoTexMap1;
     imesa->regs.s4.drawCtrl0.ui         = 0;