1 files changed, 1024 insertions, 701 deletions
diff --git a/src/mesa/shader/slang/library/slang_common_builtin.gc b/src/mesa/shader/slang/library/slang_common_builtin.gc
index 768cef5474..42a5d723b4 100755..100644
--- a/src/mesa/shader/slang/library/slang_common_builtin.gc
+++ b/src/mesa/shader/slang/library/slang_common_builtin.gc
@@ -26,6 +26,8 @@
 // From Shader Spec, ver. 1.10, rev. 59
 //
 
+//bp: XXX these will probably go away since the value needs to be
+//determined at runtime and may vary from one GLcontext to another...
 const int gl_MaxLights = 8;
 const int gl_MaxClipPlanes = 6;
 const int gl_MaxTextureUnits = 8;
@@ -45,6 +47,7 @@ uniform mat4 gl_ModelViewProjectionMatrix;
 uniform mat4 gl_TextureMatrix[gl_MaxTextureCoords];
 
 uniform mat3 gl_NormalMatrix;
+uniform mat3 __NormalMatrixTranspose;  // Mesa only
 
 uniform mat4 gl_ModelViewMatrixInverse;
 uniform mat4 gl_ProjectionMatrixInverse;
@@ -155,128 +158,159 @@ struct gl_FogParameters {
 
 uniform gl_FogParameters gl_Fog;
 
+
+
+
+
 //
 // 8.1 Angle and Trigonometry Functions
 //
 
-float radians (float deg) {
-    return 3.141593 * deg / 180.0;
+//// radians
+
+float radians(const float deg)
+{
+   const float c = 3.1415926 / 180.0;
+   __asm vec4_multiply __retVal.x, deg, c;
 }
 
-vec2 radians (vec2 deg) {
-    return vec2 (3.141593) * deg / vec2 (180.0);
+vec2 radians(const vec2 deg)
+{
+   const float c = 3.1415926 / 180.0;
+   __asm vec4_multiply __retVal.xy, deg.xy, c.xx;
 }
 
-vec3 radians (vec3 deg) {
-    return vec3 (3.141593) * deg / vec3 (180.0);
+vec3 radians(const vec3 deg)
+{
+   const float c = 3.1415926 / 180.0;
+   __asm vec4_multiply __retVal.xyz, deg.xyz, c.xxx;
 }
 
-vec4 radians (vec4 deg) {
-    return vec4 (3.141593) * deg / vec4 (180.0);
+vec4 radians(const vec4 deg)
+{
+   const float c = 3.1415926 / 180.0;
+   __asm vec4_multiply __retVal, deg, c.xxxx;
 }
 
-float degrees (float rad) {
-    return 180.0 * rad / 3.141593;
+
+//// degrees
+
+float degrees(const float rad)
+{
+   const float c = 180.0 / 3.1415926;
+   __asm vec4_multiply __retVal.x, rad, c;
 }
 
-vec2 degrees (vec2 rad) {
-    return vec2 (180.0) * rad / vec2 (3.141593);
+vec2 degrees(const vec2 rad)
+{
+   const float c = 3.1415926 / 180.0;
+   __asm vec4_multiply __retVal.xy, rad.xy, c.xx;
 }
 
-vec3 degrees (vec3 rad) {
-    return vec3 (180.0) * rad / vec3 (3.141593);
+vec3 degrees(const vec3 rad)
+{
+   const float c = 3.1415926 / 180.0;
+   __asm vec4_multiply __retVal.xyz, rad.xyz, c.xxx;
 }
 
-vec4 degrees (vec4 rad) {
-    return vec4 (180.0) * rad / vec4 (3.141593);
+vec4 degrees(const vec4 rad)
+{
+   const float c = 3.1415926 / 180.0;
+   __asm vec4_multiply __retVal, rad, c.xxxx;
 }
 
-float sin (float angle) {
-    float x;
-    __asm float_sine x, angle;
-    return x;
+
+//// sin
+
+float sin(const float radians)
+{
+   __asm float_sine __retVal.x, radians;
 }
 
-vec2 sin (vec2 angle) {
-    return vec2 (
-        sin (angle.x),
-        sin (angle.y)
-    );
+vec2 sin(const vec2 radians)
+{
+   __asm float_sine __retVal.x, radians.x;
+   __asm float_sine __retVal.y, radians.y;
 }
 
-vec3 sin (vec3 angle) {
-    return vec3 (
-        sin (angle.x),
-        sin (angle.y),
-        sin (angle.z)
-    );
+vec3 sin(const vec3 radians)
+{
+   __asm float_sine __retVal.x, radians.x;
+   __asm float_sine __retVal.y, radians.y;
+   __asm float_sine __retVal.z, radians.z;
 }
 
-vec4 sin (vec4 angle) {
-    return vec4 (
-        sin (angle.x),
-        sin (angle.y),
-        sin (angle.z),
-        sin (angle.w)
-    );
+vec4 sin(const vec4 radians)
+{
+   __asm float_sine __retVal.x, radians.x;
+   __asm float_sine __retVal.y, radians.y;
+   __asm float_sine __retVal.z, radians.z;
+   __asm float_sine __retVal.w, radians.w;
 }
 
-float cos (float angle) {
-    return sin (angle + 1.5708);
+
+//// cos
+
+float cos(const float radians)
+{
+   __asm float_cosine __retVal.x, radians;
 }
 
-vec2 cos (vec2 angle) {
-    return vec2 (
-        cos (angle.x),
-        cos (angle.y)
-    );
+vec2 cos(const vec2 radians)
+{
+   __asm float_cosine __retVal.x, radians.x;
+   __asm float_cosine __retVal.y, radians.y;
 }
 
-vec3 cos (vec3 angle) {
-    return vec3 (
-        cos (angle.x),
-        cos (angle.y),
-        cos (angle.z)
-    );
+vec3 cos(const vec3 radians)
+{
+   __asm float_cosine __retVal.x, radians.x;
+   __asm float_cosine __retVal.y, radians.y;
+   __asm float_cosine __retVal.z, radians.z;
 }
 
-vec4 cos (vec4 angle) {
-    return vec4 (
-        cos (angle.x),
-        cos (angle.y),
-        cos (angle.z),
-        cos (angle.w)
-    );
+vec4 cos(const vec4 radians)
+{
+   __asm float_cosine __retVal.x, radians.x;
+   __asm float_cosine __retVal.y, radians.y;
+   __asm float_cosine __retVal.z, radians.z;
+   __asm float_cosine __retVal.w, radians.w;
 }
 
-float tan (float angle) {
-    return sin (angle) / cos (angle);
+
+
+//// tan
+
+float tan(const float angle)
+{
+   const float s = sin(angle);
+   const float c = cos(angle);
+   return s / c;
 }
 
-vec2 tan (vec2 angle) {
-    return vec2 (
-        tan (angle.x),
-        tan (angle.y)
-    );
+vec2 tan(const vec2 angle)
+{
+   const vec2 s = sin(angle);
+   const vec2 c = cos(angle);
+   return s / c;
 }
 
-vec3 tan (vec3 angle) {
-    return vec3 (
-        tan (angle.x),
-        tan (angle.y),
-        tan (angle.z)
-    );
+vec3 tan(const vec3 angle)
+{
+   const vec3 s = sin(angle);
+   const vec3 c = cos(angle);
+   return s / c;
 }
 
-vec4 tan (vec4 angle) {
-    return vec4 (
-        tan (angle.x),
-        tan (angle.y),
-        tan (angle.z),
-        tan (angle.w)
-    );
+vec4 tan(const vec4 angle)
+{
+   const vec4 s = sin(angle);
+   const vec4 c = cos(angle);
+   return s / c;
 }
 
+
+
 float asin (float x) {
     float y;
     __asm float_arcsine y, x;
@@ -404,675 +438,872 @@ vec4 atan (vec4 u, vec4 v) {
 // 8.2 Exponential Functions
 //
 
-float pow (float x, float y) {
-    float p;
-    __asm float_power p, x, y;
-    return p;
+//// pow
+
+float pow(const float a, const float b)
+{
+   __asm float_power __retVal.x, a, b;
 }
 
-vec2 pow (vec2 v, vec2 u) {
-    return vec2 (
-        pow (v.x, u.x),
-        pow (v.y, u.y)
-    );
+vec2 pow(const vec2 a, const vec2 b)
+{
+   __asm float_power __retVal.x, a.x, b.x;
+   __asm float_power __retVal.y, a.y, b.y;
 }
 
-vec3 pow (vec3 v, vec3 u) {
-    return vec3 (
-        pow (v.x, u.x),
-        pow (v.y, u.y),
-        pow (v.z, u.z)
-    );
+vec3 pow(const vec3 a, const vec3 b)
+{
+   __asm float_power __retVal.x, a.x, b.x;
+   __asm float_power __retVal.y, a.y, b.y;
+   __asm float_power __retVal.z, a.z, b.z;
 }
 
-vec4 pow (vec4 v, vec4 u) {
-    return vec4 (
-        pow (v.x, u.x),
-        pow (v.y, u.y),
-        pow (v.z, u.z),
-        pow (v.w, u.w)
-    );
+vec4 pow(const vec4 a, const vec4 b)
+{
+   __asm float_power __retVal.x, a.x, b.x;
+   __asm float_power __retVal.y, a.y, b.y;
+   __asm float_power __retVal.z, a.z, b.z;
+   __asm float_power __retVal.w, a.w, b.w;
 }
 
-float exp (float x) {
-    return pow (2.71828183, x);
+
+//// exp
+
+float exp(const float a)
+{
+   __asm float_exp __retVal.x, a;
 }
 
-vec2 exp (vec2 v) {
-    return pow (vec2 (2.71828183), v);
+vec2 exp(const vec2 a)
+{
+   __asm float_exp __retVal.x, a.x;
+   __asm float_exp __retVal.y, a.y;
 }
 
-vec3 exp (vec3 v) {
-    return pow (vec3 (2.71828183), v);
+vec3 exp(const vec3 a)
+{
+   __asm float_exp __retVal.x, a.x;
+   __asm float_exp __retVal.y, a.y;
+   __asm float_exp __retVal.z, a.z;
 }
 
-vec4 exp (vec4 v) {
-    return pow (vec4 (2.71828183), v);
+vec4 exp(const vec4 a)
+{
+   __asm float_exp __retVal.x, a.x;
+   __asm float_exp __retVal.y, a.y;
+   __asm float_exp __retVal.z, a.z;
+   __asm float_exp __retVal.w, a.w;
 }
 
-float log2 (float x) {
-    float y;
-    __asm float_log2 y, x;
-    return y;
+
+
+//// log2
+
+float log2(const float x)
+{
+   __asm float_log2 __retVal.x, x;
 }
 
-vec2 log2 (vec2 v) {
-    return vec2 (
-        log2 (v.x),
-        log2 (v.y)
-    );
+vec2 log2(const vec2 v)
+{
+   __asm float_log2 __retVal.x, v.x;
+   __asm float_log2 __retVal.y, v.y;
 }
 
-vec3 log2 (vec3 v) {
-    return vec3 (
-        log2 (v.x),
-        log2 (v.y),
-        log2 (v.z)
-    );
+vec3 log2(const vec3 v)
+{
+   __asm float_log2 __retVal.x, v.x;
+   __asm float_log2 __retVal.y, v.y;
+   __asm float_log2 __retVal.z, v.z;
 }
 
-vec4 log2 (vec4 v) {
-    return vec4 (
-        log2 (v.x),
-        log2 (v.y),
-        log2 (v.z),
-        log2 (v.w)
-    );
+vec4 log2(const vec4 v)
+{
+   __asm float_log2 __retVal.x, v.x;
+   __asm float_log2 __retVal.y, v.y;
+   __asm float_log2 __retVal.z, v.z;
+   __asm float_log2 __retVal.w, v.w;
 }
 
-float log (float x) {
-    return log2 (x) / log2 (2.71828183);
+
+//// log  (natural log)
+
+float log(const float x)
+{
+   // note:  logBaseB(x) = logBaseN(x) / logBaseN(B)
+   // compute log(x) = log2(x) / log2(e)
+   // c = 1.0 / log2(e) = 0.693147181
+   const float c = 0.693147181;
+   return log2(x) * c;
 }
 
-vec2 log (vec2 v) {
-    return log2 (v) / log2 (vec2 (2.71828183));
+vec2 log(const vec2 v)
+{
+   const float c = 0.693147181;
+   return log2(v) * c;
 }
 
-vec3 log (vec3 v) {
-    return log2 (v) / log2 (vec3 (2.71828183));
+vec3 log(const vec3 v)
+{
+   const float c = 0.693147181;
+   return log2(v) * c;
 }
 
-vec4 log (vec4 v) {
-    return log2 (v) / log2 (vec4 (2.71828183));
+vec4 log(const vec4 v)
+{
+   const float c = 0.693147181;
+   return log2(v) * c;
 }
 
-float exp2 (float x) {
-    return pow (2.0, x);
+
+//// exp2
+
+float exp2(const float a)
+{
+   __asm float_exp2 __retVal.x, a;
 }
 
-vec2 exp2 (vec2 v) {
-    return pow (vec2 (2.0), v);
+vec2 exp2(const vec2 a)
+{
+   __asm float_exp2 __retVal.x, a.x;
+   __asm float_exp2 __retVal.y, a.y;
 }
 
-vec3 exp2 (vec3 v) {
-    return pow (vec3 (2.0), v);
+vec3 exp2(const vec3 a)
+{
+   __asm float_exp2 __retVal.x, a.x;
+   __asm float_exp2 __retVal.y, a.y;
+   __asm float_exp2 __retVal.z, a.z;
 }
 
-vec4 exp2 (vec4 v) {
-    return pow (vec4 (2.0), v);
+vec4 exp2(const vec4 a)
+{
+   __asm float_exp2 __retVal.x, a.x;
+   __asm float_exp2 __retVal.y, a.y;
+   __asm float_exp2 __retVal.z, a.z;
+   __asm float_exp2 __retVal.w, a.w;
 }
 
-float sqrt (float x) {
-    return pow (x, 0.5);
+
+//// sqrt
+
+float sqrt(const float x)
+{
+   float r;
+   __asm float_rsq r, x;
+   __asm float_rcp __retVal.x, r;
 }
 
-vec2 sqrt (vec2 v) {
-    return pow (v, vec2 (0.5));
+vec2 sqrt(const vec2 v)
+{
+   float r;
+   __asm float_rsq r, v.x;
+   __asm float_rcp __retVal.x, r;
+   __asm float_rsq r, v.y;
+   __asm float_rcp __retVal.y, r;
 }
 
-vec3 sqrt (vec3 v) {
-    return pow (v, vec3 (0.5));
+vec3 sqrt(const vec3 v)
+{
+   float r;
+   __asm float_rsq r, v.x;
+   __asm float_rcp __retVal.x, r;
+   __asm float_rsq r, v.y;
+   __asm float_rcp __retVal.y, r;
+   __asm float_rsq r, v.z;
+   __asm float_rcp __retVal.z, r;
 }
 
-vec4 sqrt (vec4 v) {
-    return pow (v, vec4 (0.5));
+vec4 sqrt(const vec4 v)
+{
+   float r;
+   __asm float_rsq r, v.x;
+   __asm float_rcp __retVal.x, r;
+   __asm float_rsq r, v.y;
+   __asm float_rcp __retVal.y, r;
+   __asm float_rsq r, v.z;
+   __asm float_rcp __retVal.z, r;
+   __asm float_rsq r, v.w;
+   __asm float_rcp __retVal.w, r;
 }
 
-float inversesqrt (float x) {
-    return 1.0 / sqrt (x);
+
+//// inversesqrt
+
+float inversesqrt(const float x)
+{
+   __asm float_rsq __retVal.x, x;
+}
+
+vec2 inversesqrt(const vec2 v)
+{
+   __asm float_rsq __retVal.x, v.x;
+   __asm float_rsq __retVal.y, v.y;
 }
 
-vec2 inversesqrt (vec2 v) {
-    return vec2 (1.0) / sqrt (v);
+vec3 inversesqrt(const vec3 v)
+{
+   __asm float_rsq __retVal.x, v.x;
+   __asm float_rsq __retVal.y, v.y;
+   __asm float_rsq __retVal.z, v.z;
 }
 
-vec3 inversesqrt (vec3 v) {
-    return vec3 (1.0) / sqrt (v);
+vec4 inversesqrt(const vec4 v)
+{
+   __asm float_rsq __retVal.x, v.x;
+   __asm float_rsq __retVal.y, v.y;
+   __asm float_rsq __retVal.z, v.z;
+   __asm float_rsq __retVal.w, v.w;
+}
+
+
+//// normalize
+
+float normalize(const float x)
+{
+   __retVal.x = 1.0;
 }
 
-vec4 inversesqrt (vec4 v) {
-    return vec4 (1.0) / sqrt (v);
+vec2 normalize(const vec2 v)
+{
+   const float s = inversesqrt(dot(v, v));
+   __asm vec4_multiply __retVal.xy, v, s.xx;
 }
 
+vec3 normalize(const vec3 v)
+{
+//   const float s = inversesqrt(dot(v, v));
+//   __retVal = v * s;
+// XXX note, we _could_ use __retVal.w instead of tmp and and save a
+// register, but that's actually a compilation error because v is a vec3
+// and the .w suffix is illegal.  Oh well.
+   float tmp;
+   __asm vec3_dot tmp, v, v;
+   __asm float_rsq tmp, tmp;
+   __asm vec4_multiply __retVal.xyz, v, tmp.xxx;
+}
+
+vec4 normalize(const vec4 v)
+{
+   float tmp;
+   __asm vec4_dot tmp, v, v;
+   __asm float_rsq tmp, tmp;
+   __asm vec4_multiply __retVal.xyz, v, tmp.xxx;
+}
+
+
+
 //
 // 8.3 Common Functions
 //
 
-float abs (float x) {
-    return x >= 0.0 ? x : -x;
+
+//// abs
+
+float abs(const float a)
+{
+   __asm vec4_abs __retVal.x, a;
 }
 
-vec2 abs (vec2 v) {
-    return vec2 (
-        abs (v.x),
-        abs (v.y)
-    );
+vec2 abs(const vec2 a)
+{
+   __asm vec4_abs __retVal.xy, a;
 }
 
-vec3 abs (vec3 v) {
-    return vec3 (
-        abs (v.x),
-        abs (v.y),
-        abs (v.z)
-    );
+vec3 abs(const vec3 a)
+{
+   __asm vec4_abs __retVal.xyz, a;
 }
 
-vec4 abs (vec4 v) {
-    return vec4 (
-        abs (v.x),
-        abs (v.y),
-        abs (v.z),
-        abs (v.w)
-    );
+vec4 abs(const vec4 a)
+{
+   __asm vec4_abs __retVal, a;
 }
 
-float sign (float x) {
-    return x > 0.0 ? 1.0 : x < 0.0 ? -1.0 : 0.0;
+
+//// sign
+
+float sign(const float x)
+{
+   float p, n;
+   __asm vec4_sgt p.x, x, 0.0;            // p = (x > 0)
+   __asm vec4_sgt n.x, 0.0, x;            // n = (x < 0)
+   __asm vec4_subtract __retVal.x, p, n;  // sign = p - n
 }
 
-vec2 sign (vec2 v) {
-    return vec2 (
-        sign (v.x),
-        sign (v.y)
-    );
+vec2 sign(const vec2 v)
+{
+   vec2 p, n;
+   __asm vec4_sgt p.xy, v, 0.0;
+   __asm vec4_sgt n.xy, 0.0, v;
+   __asm vec4_subtract __retVal.xy, p, n;
 }
 
-vec3 sign (vec3 v) {
-    return vec3 (
-        sign (v.x),
-        sign (v.y),
-        sign (v.z)
-    );
+vec3 sign(const vec3 v)
+{
+   vec3 p, n;
+   __asm vec4_sgt p.xyz, v, 0.0;
+   __asm vec4_sgt n.xyz, 0.0, v;
+   __asm vec4_subtract __retVal.xyz, p, n;
 }
 
-vec4 sign (vec4 v) {
-    return vec4 (
-        sign (v.x),
-        sign (v.y),
-        sign (v.z),
-        sign (v.w)
-    );
+vec4 sign(const vec4 v)
+{
+   vec4 p, n;
+   __asm vec4_sgt p, v, 0.0;
+   __asm vec4_sgt n, 0.0, v;
+   __asm vec4_subtract __retVal, p, n;
 }
 
-float floor (float x) {
-    float y;
-    __asm float_floor y, x;
-    return y;
+
+//// floor
+
+float floor(const float a)
+{
+   __asm vec4_floor __retVal.x, a;
 }
 
-vec2 floor (vec2 v) {
-    return vec2 (
-        floor (v.x),
-        floor (v.y)
-    );
+vec2 floor(const vec2 a)
+{
+   __asm vec4_floor __retVal.xy, a;
 }
 
-vec3 floor (vec3 v) {
-    return vec3 (
-        floor (v.x),
-        floor (v.y),
-        floor (v.z)
-    );
+vec3 floor(const vec3 a)
+{
+   __asm vec4_floor __retVal.xyz, a;
 }
 
-vec4 floor (vec4 v) {
-    return vec4 (
-        floor (v.x),
-        floor (v.y),
-        floor (v.z),
-        floor (v.w)
-    );
+vec4 floor(const vec4 a)
+{
+   __asm vec4_floor __retVal, a;
 }
 
-float ceil (float x) {
-    float y;
-    __asm float_ceil y, x;
-    return y;
+
+//// ceil
+
+float ceil(const float a)
+{
+   // XXX this could be improved
+   float b = -a;
+   __asm vec4_floor b, b;
+   __retVal.x = -b;
 }
 
-vec2 ceil (vec2 v) {
-    return vec2 (
-        ceil (v.x),
-        ceil (v.y)
-    );
+vec2 ceil(const vec2 a)
+{
+   vec2 b = -a;
+   __asm vec4_floor b, b;
+   __retVal.xy = -b;
 }
 
-vec3 ceil (vec3 v) {
-    return vec3 (
-        ceil (v.x),
-        ceil (v.y),
-        ceil (v.z)
-    );
+vec3 ceil(const vec3 a)
+{
+   vec3 b = -a;
+   __asm vec4_floor b, b;
+   __retVal.xyz = -b;
 }
 
-vec4 ceil (vec4 v) {
-    return vec4 (
-        ceil (v.x),
-        ceil (v.y),
-        ceil (v.z),
-        ceil (v.w)
-    );
+vec4 ceil(const vec4 a)
+{
+   vec4 b = -a;
+   __asm vec4_floor b, b;
+   __retVal = -b;
 }
 
-float fract (float x) {
-    return x - floor (x);
+
+//// fract
+
+float fract(const float a)
+{
+   __asm vec4_frac __retVal.x, a;
 }
 
-vec2 fract (vec2 v) {
-    return v - floor (v);
+vec2 fract(const vec2 a)
+{
+   __asm vec4_frac __retVal.xy, a;
 }
 
-vec3 fract (vec3 v) {
-    return v - floor (v);
+vec3 fract(const vec3 a)
+{
+   __asm vec4_frac __retVal.xyz, a;
 }
 
-vec4 fract (vec4 v) {
-    return v - floor (v);
+vec4 fract(const vec4 a)
+{
+   __asm vec4_frac __retVal, a;
 }
 
-float mod (float x, float y) {
-    return x - y * floor (x / y);
+
+//// mod  (very untested!)
+
+float mod(const float a, const float b)
+{
+    float oneOverB;
+    __asm float_rcp oneOverB, b;
+    __retVal.x = a - b * floor(a * oneOverB);
 }
 
-vec2 mod (vec2 v, float u) {
-    return v - u * floor (v / u);
+vec2 mod(const vec2 a, const float b)
+{
+    float oneOverB;
+    __asm float_rcp oneOverB, b;
+    __retVal.xy = a - b * floor(a * oneOverB);
 }
 
-vec3 mod (vec3 v, float u) {
-    return v - u * floor (v / u);
+vec3 mod(const vec3 a, const float b)
+{
+    float oneOverB;
+    __asm float_rcp oneOverB, b;
+    __retVal.xyz = a - b * floor(a * oneOverB);
 }
 
-vec4 mod (vec4 v, float u) {
-    return v - u * floor (v / u);
+vec4 mod(const vec4 a, const float b)
+{
+    float oneOverB;
+    __asm float_rcp oneOverB, b;
+    __retVal = a - b * floor(a * oneOverB);
 }
 
-vec2 mod (vec2 v, vec2 u) {
-    return v - u * floor (v / u);
+vec2 mod(const vec2 a, const vec2 b)
+{
+    float oneOverBx, oneOverBy;
+    __asm float_rcp oneOverBx, b.x;
+    __asm float_rcp oneOverBy, b.y;
+    __retVal.x = a.x - b.x * floor(a.x * oneOverBx);
+    __retVal.y = a.y - b.y * floor(a.y * oneOverBy);
 }
 
-vec3 mod (vec3 v, vec3 u) {
-    return v - u * floor (v / u);
+vec3 mod(const vec3 a, const vec3 b)
+{
+    float oneOverBx, oneOverBy, oneOverBz;
+    __asm float_rcp oneOverBx, b.x;
+    __asm float_rcp oneOverBy, b.y;
+    __asm float_rcp oneOverBz, b.z;
+    __retVal.x = a.x - b.x * floor(a.x * oneOverBx);
+    __retVal.y = a.y - b.y * floor(a.y * oneOverBy);
+    __retVal.z = a.z - b.z * floor(a.z * oneOverBz);
 }
 
-vec4 mod (vec4 v, vec4 u) {
-    return v - u * floor (v / u);
+vec4 mod(const vec4 a, const vec4 b)
+{
+    float oneOverBx, oneOverBy, oneOverBz, oneOverBw;
+    __asm float_rcp oneOverBx, b.x;
+    __asm float_rcp oneOverBy, b.y;
+    __asm float_rcp oneOverBz, b.z;
+    __asm float_rcp oneOverBw, b.w;
+    __retVal.x = a.x - b.x * floor(a.x * oneOverBx);
+    __retVal.y = a.y - b.y * floor(a.y * oneOverBy);
+    __retVal.z = a.z - b.z * floor(a.z * oneOverBz);
+    __retVal.w = a.w - b.w * floor(a.w * oneOverBz);
 }
 
-float min (float x, float y) {
-    return x < y ? x : y;
+
+//// min
+
+float min(const float a, const float b)
+{
+   __asm vec4_min __retVal.x, a.x, b.x;
 }
 
-vec2 min (vec2 v, vec2 u) {
-    return vec2 (
-        min (v.x, u.x),
-        min (v.y, u.y)
-    );
+vec2 min(const vec2 a, const vec2 b)
+{
+   __asm vec4_min __retVal.xy, a.xy, b.xy;
 }
 
-vec3 min (vec3 v, vec3 u) {
-    return vec3 (
-        min (v.x, u.x),
-        min (v.y, u.y),
-        min (v.z, u.z)
-    );
+vec3 min(const vec3 a, const vec3 b)
+{
+   __asm vec4_min __retVal.xyz, a.xyz, b.xyz;
 }
 
-vec4 min (vec4 v, vec4 u) {
-    return vec4 (
-        min (v.x, u.x),
-        min (v.y, u.y),
-        min (v.z, u.z),
-        min (v.w, u.w)
-    );
+vec4 min(const vec4 a, const vec4 b)
+{
+   __asm vec4_min __retVal, a, b;
 }
 
-vec2 min (vec2 v, float y) {
-    return min (v, vec2 (y));
+vec2 min(const vec2 a, const float b)
+{
+   __asm vec4_min __retVal, a.xy, b.xx;
 }
 
-vec3 min (vec3 v, float y) {
-    return min (v, vec3 (y));
+vec3 min(const vec3 a, const float b)
+{
+   __asm vec4_min __retVal, a.xyz, b.xxx;
 }
 
-vec4 min (vec4 v, float y) {
-    return min (v, vec4 (y));
+vec4 min(const vec4 a, const float b)
+{
+   __asm vec4_min __retVal, a, b.xxxx;
 }
 
-float max (float x, float y) {
-    return x < y ? y : x;
+
+//// max
+
+float max(const float a, const float b)
+{
+   __asm vec4_max __retVal.x, a.x, b.x;
 }
 
-vec2 max (vec2 v, vec2 u) {
-    return vec2 (
-        max (v.x, u.x),
-        max (v.y, u.y)
-    );
+vec2 max(const vec2 a, const vec2 b)
+{
+   __asm vec4_max __retVal.xy, a.xy, b.xy;
 }
 
-vec3 max (vec3 v, vec3 u) {
-    return vec3 (
-        max (v.x, u.x),
-        max (v.y, u.y),
-        max (v.z, u.z)
-    );
+vec3 max(const vec3 a, const vec3 b)
+{
+   __asm vec4_max __retVal.xyz, a.xyz, b.xyz;
 }
 
-vec4 max (vec4 v, vec4 u) {
-    return vec4 (
-        max (v.x, u.x),
-        max (v.y, u.y),
-        max (v.z, u.z),
-        max (v.w, u.w)
-    );
+vec4 max(const vec4 a, const vec4 b)
+{
+   __asm vec4_max __retVal, a, b;
 }
 
-vec2 max (vec2 v, float y) {
-    return max (v, vec2 (y));
+vec2 max(const vec2 a, const float b)
+{
+   __asm vec4_max __retVal, a.xy, b.xx;
 }
 
-vec3 max (vec3 v, float y) {
-    return max (v, vec3 (y));
+vec3 max(const vec3 a, const float b)
+{
+   __asm vec4_max __retVal, a.xyz, b.xxx;
 }
 
-vec4 max (vec4 v, float y) {
-    return max (v, vec4 (y));
+vec4 max(const vec4 a, const float b)
+{
+   __asm vec4_max __retVal, a, b.xxxx;
 }
 
-float clamp (float x, float minVal, float maxVal) {
-    return min (max (x, minVal), maxVal);
+
+//// clamp
+
+float clamp(const float val, const float minVal, const float maxVal)
+{
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
 }
 
-vec2 clamp (vec2 x, float minVal, float maxVal) {
-    return min (max (x, minVal), maxVal);
+vec2 clamp(const vec2 val, const float minVal, const float maxVal)
+{
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
 }
 
-vec3 clamp (vec3 x, float minVal, float maxVal) {
-    return min (max (x, minVal), maxVal);
+vec3 clamp(const vec3 val, const float minVal, const float maxVal)
+{
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
 }
 
-vec4 clamp (vec4 x, float minVal, float maxVal) {
-    return min (max (x, minVal), maxVal);
+vec4 clamp(const vec4 val, const float minVal, const float maxVal)
+{
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
 }
 
-vec2 clamp (vec2 x, vec2 minVal, vec2 maxVal) {
-    return min (max (x, minVal), maxVal);
+vec2 clamp(const vec2 val, const vec2 minVal, const vec2 maxVal)
+{
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
 }
 
-vec3 clamp (vec3 x, vec3 minVal, vec3 maxVal) {
-    return min (max (x, minVal), maxVal);
+vec3 clamp(const vec3 val, const vec3 minVal, const vec3 maxVal)
+{
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
 }
 
-vec4 clamp (vec4 x, vec4 minVal, vec4 maxVal) {
-    return min (max (x, minVal), maxVal);
+vec4 clamp(const vec4 val, const vec4 minVal, const vec4 maxVal)
+{
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
 }
 
-float mix (float x, float y, float a) {
-    return x * (1.0 - a) + y * a;
+
+//// mix
+
+float mix(const float x, const float y, const float a)
+{
+   __asm vec4_lrp __retVal, a, y, x;
 }
 
-vec2 mix (vec2 x, vec2 y, float a) {
-    return x * (1.0 - a) + y * a;
+vec2 mix(const vec2 x, const vec2 y, const float a)
+{
+   __asm vec4_lrp __retVal, a, y, x;
 }
 
-vec3 mix (vec3 x, vec3 y, float a) {
-    return x * (1.0 - a) + y * a;
+vec3 mix(const vec3 x, const vec3 y, const float a)
+{
+   __asm vec4_lrp __retVal, a, y, x;
 }
 
-vec4 mix (vec4 x, vec4 y, float a) {
-    return x * (1.0 - a) + y * a;
+vec4 mix(const vec4 x, const vec4 y, const float a)
+{
+   __asm vec4_lrp __retVal, a, y, x;
 }
 
-vec2 mix (vec2 x, vec2 y, vec2 a) {
-    return x * (1.0 - a) + y * a;
+vec2 mix(const vec2 x, const vec2 y, const vec2 a)
+{
+   __asm vec4_lrp __retVal, a, y, x;
 }
 
-vec3 mix (vec3 x, vec3 y, vec3 a) {
-    return x * (1.0 - a) + y * a;
+vec3 mix(const vec3 x, const vec3 y, const vec3 a)
+{
+   __asm vec4_lrp __retVal, a, y, x;
 }
 
-vec4 mix (vec4 x, vec4 y, vec4 a) {
-    return x * (1.0 - a) + y * a;
+vec4 mix(const vec4 x, const vec4 y, const vec4 a)
+{
+   __asm vec4_lrp __retVal, a, y, x;
 }
 
-float step (float edge, float x) {
-    return x < edge ? 0.0 : 1.0;
+
+//// step (untested)
+
+float step(const float edge, const float x)
+{
+   __asm vec4_sgt __retVal.x, x, edge;
 }
 
-vec2 step (vec2 edge, vec2 v) {
-    return vec2 (
-        step (edge.x, v.x),
-        step (edge.y, v.y)
-    );
+vec2 step(const vec2 edge, const vec2 x)
+{
+   __asm vec4_sgt __retVal.xy, x, edge;
 }
 
-vec3 step (vec3 edge, vec3 v) {
-    return vec3 (
-        step (edge.x, v.x),
-        step (edge.y, v.y),
-        step (edge.z, v.z)
-    );
+vec3 step(const vec3 edge, const vec3 x)
+{
+   __asm vec4_sgt __retVal.xyz, x, edge;
 }
 
-vec4 step (vec4 edge, vec4 v) {
-    return vec4 (
-        step (edge.x, v.x),
-        step (edge.y, v.y),
-        step (edge.z, v.z),
-        step (edge.w, v.w)
-    );
+vec4 step(const vec4 edge, const vec4 x)
+{
+   __asm vec4_sgt __retVal, x, edge;
 }
 
-vec2 step (float edge, vec2 v) {
-    return step (vec2 (edge), v);
+vec2 step(const float edge, const vec2 v)
+{
+   __asm vec4_sgt __retVal.xy, v, edge.xx;
 }
 
-vec3 step (float edge, vec3 v) {
-    return step (vec3 (edge), v);
+vec3 step(const float edge, const vec3 v)
+{
+   __asm vec4_sgt __retVal.xyz, v, edge.xxx;
 }
 
-vec4 step (float edge, vec4 v) {
-    return step (vec4 (edge), v);
+vec4 step(const float edge, const vec4 v)
+{
+   __asm vec4_sgt __retVal, v, edge.xxxx;
 }
 
-float smoothstep (float edge0, float edge1, float x) {
-    float t = clamp ((x - edge0) / (edge1 - edge0), 0.0, 1.0);
+
+//// smoothstep (untested)
+
+float smoothstep(const float edge0, const float edge1, const float x)
+{
+    float t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0);
     return t * t * (3.0 - 2.0 * t);
 }
 
-vec2 smoothstep (vec2 edge0, vec2 edge1, vec2 v) {
-    return vec2 (
-        smoothstep (edge0.x, edge1.x, v.x),
-        smoothstep (edge0.y, edge1.y, v.y)
-    );
+vec2 smoothstep(const vec2 edge0, const vec2 edge1, const vec2 v)
+{
+   vec2 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0);
+   return t * t * (3.0 - 2.0 * t);
 }
 
-vec3 smoothstep (vec3 edge0, vec3 edge1, vec3 v) {
-    return vec3 (
-        smoothstep (edge0.x, edge1.x, v.x),
-        smoothstep (edge0.y, edge1.y, v.y),
-        smoothstep (edge0.z, edge1.z, v.z)
-    );
+vec3 smoothstep(const vec3 edge0, const vec3 edge1, const vec3 v)
+{
+   vec3 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0);
+   return t * t * (3.0 - 2.0 * t);
 }
 
-vec4 smoothstep (vec4 edge0, vec4 edge1, vec4 v) {
-    return vec4 (
-        smoothstep (edge0.x, edge1.x, v.x),
-        smoothstep (edge0.y, edge1.y, v.y),
-        smoothstep (edge0.z, edge1.z, v.z),
-        smoothstep (edge0.w, edge1.w, v.w)
-    );
+vec4 smoothstep(const vec4 edge0, const vec4 edge1, const vec4 v)
+{
+   vec4 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0);
+   return t * t * (3.0 - 2.0 * t);
 }
 
-vec2 smoothstep (float edge0, float edge1, vec2 v) {
-    return vec2 (
-        smoothstep (edge0, edge1, v.x),
-        smoothstep (edge0, edge1, v.y)
-    );
+vec2 smoothstep(const float edge0, const float edge1, const vec2 v)
+{
+   vec2 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0);
+   return t * t * (3.0 - 2.0 * t);
 }
 
-vec3 smoothstep (float edge0, float edge1, vec3 v) {
-    return vec3 (
-        smoothstep (edge0, edge1, v.x),
-        smoothstep (edge0, edge1, v.y),
-        smoothstep (edge0, edge1, v.z)
-    );
+vec3 smoothstep(const float edge0, const float edge1, const vec3 v)
+{
+   vec3 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0);
+   return t * t * (3.0 - 2.0 * t);
 }
 
-vec4 smoothstep (float edge0, float edge1, vec4 v) {
-    return vec4 (
-        smoothstep (edge0, edge1, v.x),
-        smoothstep (edge0, edge1, v.y),
-        smoothstep (edge0, edge1, v.z),
-        smoothstep (edge0, edge1, v.w)
-    );
+vec4 smoothstep(const float edge0, const float edge1, const vec4 v)
+{
+   vec4 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0);
+   return t * t * (3.0 - 2.0 * t);
 }
 
+
+
 //
 // 8.4 Geometric Functions
 //
 
-float dot (float x, float y) {
-    return x * y;
-}
 
-float dot (vec2 v, vec2 u) {
-    return v.x * u.x + v.y * u.y;
-}
+//// length
 
-float dot (vec3 v, vec3 u) {
-    return v.x * u.x + v.y * u.y + v.z * u.z;
+float length(const float x)
+{
+   return abs(x);
 }
 
-float dot (vec4 v, vec4 u) {
-    return v.x * u.x + v.y * u.y + v.z * u.z + v.w * u.w;
+float length(const vec2 v)
+{
+   float r;
+   const float p = dot(v, v);      // p = v.x * v.x + v.y * v.y
+   __asm float_rsq r, p;           // r = 1 / sqrt(p)
+   __asm float_rcp __retVal.x, r;  // retVal = 1 / r
 }
 
-float length (float x) {
-    return sqrt (dot (x, x));
+float length(const vec3 v)
+{
+   float r;
+   const float p = dot(v, v);      // p = v.x * v.x + v.y * v.y + v.z * v.z
+   __asm float_rsq r, p;           // r = 1 / sqrt(p)
+   __asm float_rcp __retVal.x, r;  // retVal = 1 / r
 }
 
-float length (vec2 v) {
-    return sqrt (dot (v, v));
+float length(const vec4 v)
+{
+   float r;
+   const float p = dot(v, v);      // p = v.x * v.x + v.y * v.y + ...
+   __asm float_rsq r, p;           // r = 1 / sqrt(p)
+   __asm float_rcp __retVal.x, r;  // retVal = 1 / r
 }
 
-float length (vec3 v) {
-    return sqrt (dot (v, v));
-}
 
-float length (vec4 v) {
-    return sqrt (dot (v, v));
-}
+//// distance
 
-float distance (float x, float y) {
-    return length (x - y);
+float distance(const float x, const float y)
+{
+   const float d = x - y;
+   __retVal = length(d);
 }
 
-float distance (vec2 v, vec2 u) {
-    return length (v - u);
+float distance(const vec2 v, const vec2 u)
+{
+   const vec2 d2 = v - u;
+   __retVal = length(d2);
 }
 
-float distance (vec3 v, vec3 u) {
-    return length (v - u);
+float distance(const vec3 v, const vec3 u)
+{
+   const vec3 d3 = v - u;
+   __retVal = length(d3);
 }
 
-float distance (vec4 v, vec4 u) {
-    return length (v - u);
+float distance(const vec4 v, const vec4 u)
+{
+   const vec4 d4 = v - u;
+   __retVal = length(d4);
 }
 
-vec3 cross (vec3 v, vec3 u) {
-    return vec3 (
-        v.y * u.z - u.y * v.z,
-        v.z * u.x - u.z * v.x,
-        v.x * u.y - u.x * v.y
-    );
-}
 
-float normalize (float x) {
-    return 1.0;
-}
+//// cross
 
-vec2 normalize (vec2 v) {
-    return v / length (v);
+vec3 cross(const vec3 v, const vec3 u)
+{
+   __asm vec3_cross __retVal.xyz, v, u;
 }
 
-vec3 normalize (vec3 v) {
-    return v / length (v);
-}
 
-vec4 normalize (vec4 v) {
-    return v / length (v);
-}
+//// faceforward
 
-float faceforward (float N, float I, float Nref) {
-    return dot (Nref, I) < 0.0 ? N : -N;
+float faceforward(const float N, const float I, const float Nref)
+{
+    // this could probably be done better
+    const float d = dot(Nref, I);
+    float s;
+    __asm vec4_sgt s.x, 0.0, d;  // s = (0.0 > d) ? 1 : 0
+    return mix(-N, N, s);
 }
 
-vec2 faceforward (vec2 N, vec2 I, vec2 Nref) {
-    return dot (Nref, I) < 0.0 ? N : -N;
+vec2 faceforward(const vec2 N, const vec2 I, const vec2 Nref)
+{
+    // this could probably be done better
+    const float d = dot(Nref, I);
+    float s;
+    __asm vec4_sgt s.x, 0.0, d;  // s = (0.0 > d) ? 1 : 0
+    return mix(-N, N, s);
 }
 
-vec3 faceforward (vec3 N, vec3 I, vec3 Nref) {
-    return dot (Nref, I) < 0.0 ? N : -N;
+vec3 faceforward(const vec3 N, const vec3 I, const vec3 Nref)
+{
+    // this could probably be done better
+    const float d = dot(Nref, I);
+    float s;
+    __asm vec4_sgt s.x, 0.0, d;  // s = (0.0 > d) ? 1 : 0
+    return mix(-N, N, s);
 }
 
-vec4 faceforward (vec4 N, vec4 I, vec4 Nref) {
-    return dot (Nref, I) < 0.0 ? N : -N;
+vec4 faceforward(const vec4 N, const vec4 I, const vec4 Nref)
+{
+    // this could probably be done better
+    const float d = dot(Nref, I);
+    float s;
+    __asm vec4_sgt s.x, 0.0, d;  // s = (0.0 > d) ? 1 : 0
+    return mix(-N, N, s);
 }
 
-float reflect (float I, float N) {
-    return I - 2.0 * dot (N, I) * N;
+
+//// reflect
+
+float reflect(const float I, const float N)
+{
+   return I - 2.0 * dot(N, I) * N;
 }
 
-vec2 reflect (vec2 I, vec2 N) {
-    return I - 2.0 * dot (N, I) * N;
+vec2 reflect(const vec2 I, const vec2 N)
+{
+   return I - 2.0 * dot(N, I) * N;
 }
 
-vec3 reflect (vec3 I, vec3 N) {
-    return I - 2.0 * dot (N, I) * N;
+vec3 reflect(const vec3 I, const vec3 N)
+{
+   return I - 2.0 * dot(N, I) * N;
 }
 
-vec4 reflect (vec4 I, vec4 N) {
-    return I - 2.0 * dot (N, I) * N;
+vec4 reflect(const vec4 I, const vec4 N)
+{
+   return I - 2.0 * dot(N, I) * N;
 }
 
-float refract (float I, float N, float eta) {
-    float k = 1.0 - eta * eta * (1.0 - dot (N, I) * dot (N, I));
-    if (k < 0.0)
-        return 0.0;
-    return eta * I - (eta * dot (N, I) + sqrt (k)) * N;
+//// refract
+
+float refract(const float I, const float N, const float eta)
+{
+   float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I));
+   if (k < 0.0)
+       return 0.0;
+   return eta * I - (eta * dot(N, I) + sqrt(k)) * N;
 }
 
-vec2 refract (vec2 I, vec2 N, float eta) {
-    float k = 1.0 - eta * eta * (1.0 - dot (N, I) * dot (N, I));
-    if (k < 0.0)
-        return 0.0;
-    return eta * I - (eta * dot (N, I) + sqrt (k)) * N;
+vec2 refract(const vec2 I, const vec2 N, const float eta)
+{
+   float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I));
+   if (k < 0.0)
+      return 0.0;
+   return eta * I - (eta * dot(N, I) + sqrt(k)) * N;
 }
 
-vec3 refract (vec3 I, vec3 N, float eta) {
-    float k = 1.0 - eta * eta * (1.0 - dot (N, I) * dot (N, I));
-    if (k < 0.0)
-        return 0.0;
-    return eta * I - (eta * dot (N, I) + sqrt (k)) * N;
+vec3 refract(const vec3 I, const vec3 N, const float eta)
+{
+   float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I));
+   if (k < 0.0)
+      return 0.0;
+   return eta * I - (eta * dot(N, I) + sqrt(k)) * N;
 }
 
-vec4 refract (vec4 I, vec4 N, float eta) {
-    float k = 1.0 - eta * eta * (1.0 - dot (N, I) * dot (N, I));
-    if (k < 0.0)
-        return 0.0;
-    return eta * I - (eta * dot (N, I) + sqrt (k)) * N;
+vec4 refract(const vec4 I, const vec4 N, const float eta)
+{
+   float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I));
+   if (k < 0.0)
+      return 0.0;
+   return eta * I - (eta * dot(N, I) + sqrt(k)) * N;
 }
 
+
+
+
 //
 // 8.5 Matrix Functions
 //
@@ -1089,238 +1320,346 @@ mat4 matrixCompMult (mat4 m, mat4 n) {
     return mat4 (m[0] * n[0], m[1] * n[1], m[2] * n[2], m[3] * n[3]);
 }
 
+
+
+
 //
 // 8.6 Vector Relational Functions
 //
 
-bvec2 lessThan (vec2 v, vec2 u) {
-    return bvec2 (v.x < u.x, v.y < u.y);
+//// lessThan
+
+bvec2 lessThan(const vec2 u, const vec2 v)
+{
+   __asm vec4_slt __retVal.xy, u, v;
 }
 
-bvec3 lessThan (vec3 v, vec3 u) {
-    return bvec3 (v.x < u.x, v.y < u.y, v.z < u.z);
+bvec3 lessThan(const vec3 u, const vec3 v)
+{
+   __asm vec4_slt __retVal.xyz, u, v;
 }
 
-bvec4 lessThan (vec4 v, vec4 u) {
-    return bvec4 (v.x < u.x, v.y < u.y, v.z < u.z, v.w < u.w);
+bvec4 lessThan(const vec4 u, const vec4 v)
+{
+   __asm vec4_slt __retVal, u, v;
 }
 
-bvec2 lessThan (ivec2 v, ivec2 u) {
-    return bvec2 (v.x < u.x, v.y < u.y);
+bvec2 lessThan(const ivec2 u, const ivec2 v)
+{
+   __asm vec4_slt __retVal.xy, u, v;
 }
 
-bvec3 lessThan (ivec3 v, ivec3 u) {
-    return bvec3 (v.x < u.x, v.y < u.y, v.z < u.z);
+bvec3 lessThan(const ivec3 u, const ivec3 v)
+{
+   __asm vec4_slt __retVal.xyz, u, v;
 }
 
-bvec4 lessThan (ivec4 v, ivec4 u) {
-    return bvec4 (v.x < u.x, v.y < u.y, v.z < u.z, v.w < u.w);
+bvec4 lessThan(const ivec4 u, const ivec4 v)
+{
+   __asm vec4_slt __retVal, u, v;
 }
 
-bvec2 lessThanEqual (vec2 v, vec2 u) {
-    return bvec2 (v.x <= u.x, v.y <= u.y);
+
+//// lessThanEqual
+
+bvec2 lessThanEqual(const vec2 u, const vec2 v)
+{
+   __asm vec4_sle __retVal.xy, u, v;
 }
 
-bvec3 lessThanEqual (vec3 v, vec3 u) {
-    return bvec3 (v.x <= u.x, v.y <= u.y, v.z <= u.z);
+bvec3 lessThanEqual(const vec3 u, const vec3 v)
+{
+   __asm vec4_sle __retVal.xyz, u, v;
 }
 
-bvec4 lessThanEqual (vec4 v, vec4 u) {
-    return bvec4 (v.x <= u.x, v.y <= u.y, v.z <= u.z, v.w <= u.w);
+bvec4 lessThanEqual(const vec4 u, const vec4 v)
+{
+   __asm vec4_sle __retVal, u, v;
 }
 
-bvec2 lessThanEqual (ivec2 v, ivec2 u) {
-    return bvec2 (v.x <= u.x, v.y <= u.y);
+bvec2 lessThanEqual(const ivec2 u, const ivec2 v)
+{
+   __asm vec4_sle __retVal.xy, u, v;
 }
 
-bvec3 lessThanEqual (ivec3 v, ivec3 u) {
-    return bvec3 (v.x <= u.x, v.y <= u.y, v.z <= u.z);
+bvec3 lessThanEqual(const ivec3 u, const ivec3 v)
+{
+   __asm vec4_sle __retVal.xyz, u, v;
 }
 
-bvec4 lessThanEqual (ivec4 v, ivec4 u) {
-    return bvec4 (v.x <= u.x, v.y <= u.y, v.z <= u.z, v.w <= u.w);
+bvec4 lessThanEqual(const ivec4 u, const ivec4 v)
+{
+   __asm vec4_sle __retVal, u, v;
 }
 
-bvec2 greaterThan (vec2 v, vec2 u) {
-    return bvec2 (v.x > u.x, v.y > u.y);
+
+//// greaterThan
+
+bvec2 greaterThan(const vec2 u, const vec2 v)
+{
+   __asm vec4_sgt __retVal.xy, u, v;
 }
 
-bvec3 greaterThan (vec3 v, vec3 u) {
-    return bvec3 (v.x > u.x, v.y > u.y, v.z > u.z);
+bvec3 greaterThan(const vec3 u, const vec3 v)
+{
+   __asm vec4_sgt __retVal.xyz, u, v;
 }
 
-bvec4 greaterThan (vec4 v, vec4 u) {
-    return bvec4 (v.x > u.x, v.y > u.y, v.z > u.z, v.w > u.w);
+bvec4 greaterThan(const vec4 u, const vec4 v)
+{
+   __asm vec4_sgt __retVal, u, v;
 }
 
-bvec2 greaterThan (ivec2 v, ivec2 u) {
-    return bvec2 (v.x > u.x, v.y > u.y);
+bvec2 greaterThan(const ivec2 u, const ivec2 v)
+{
+   __asm vec4_sgt __retVal.xy, u, v;
 }
 
-bvec3 greaterThan (ivec3 v, ivec3 u) {
-    return bvec3 (v.x > u.x, v.y > u.y, v.z > u.z);
+bvec3 greaterThan(const ivec3 u, const ivec3 v)
+{
+   __asm vec4_sgt __retVal.xyz, u, v;
 }
 
-bvec4 greaterThan (ivec4 v, ivec4 u) {
-   return bvec4 (v.x > u.x, v.y > u.y, v.z > u.z, v.w > u.w);
+bvec4 greaterThan(const ivec4 u, const ivec4 v)
+{
+   __asm vec4_sgt __retVal, u, v;
 }
 
-bvec2 greaterThanEqual (vec2 v, vec2 u) {
-    return bvec2 (v.x >= u.x, v.y >= u.y);
+
+//// greaterThanEqual
+
+bvec2 greaterThanEqual(const vec2 u, const vec2 v)
+{
+   __asm vec4_sge __retVal.xy, u, v;
 }
 
-bvec3 greaterThanEqual (vec3 v, vec3 u) {
-    return bvec3 (v.x >= u.x, v.y >= u.y, v.z >= u.z);
+bvec3 greaterThanEqual(const vec3 u, const vec3 v)
+{
+   __asm vec4_sge __retVal.xyz, u, v;
 }
 
-bvec4 greaterThanEqual (vec4 v, vec4 u) {
-    return bvec4 (v.x >= u.x, v.y >= u.y, v.z >= u.z, v.w >= u.w);
+bvec4 greaterThanEqual(const vec4 u, const vec4 v)
+{
+   __asm vec4_sge __retVal, u, v;
 }
 
-bvec2 greaterThanEqual (ivec2 v, ivec2 u) {
-    return bvec2 (v.x >= u.x, v.y >= u.y);
+bvec2 greaterThanEqual(const ivec2 u, const ivec2 v)
+{
+   __asm vec4_sge __retVal.xy, u, v;
 }
 
-bvec3 greaterThanEqual (ivec3 v, ivec3 u) {
-    return bvec3 (v.x >= u.x, v.y >= u.y, v.z >= u.z);
+bvec3 greaterThanEqual(const ivec3 u, const ivec3 v)
+{
+   __asm vec4_sge __retVal.xyz, u, v;
 }
 
-bvec4 greaterThanEqual (ivec4 v, ivec4 u) {
-    return bvec4 (v.x >= u.x, v.y >= u.y, v.z >= u.z, v.w >= u.w);
+bvec4 greaterThanEqual(const ivec4 u, const ivec4 v)
+{
+   __asm vec4_sge __retVal, u, v;
 }
 
-bvec2 equal (vec2 v, vec2 u) {
-    return bvec2 (v.x == u.x, v.y == u.y);
+
+//// equal
+
+bvec2 equal(const vec2 u, const vec2 v)
+{
+   __asm vec4_seq __retVal.xy, u, v;
 }
 
-bvec3 equal (vec3 v, vec3 u) {
-    return bvec3 (v.x == u.x, v.y == u.y, v.z == u.z);
+bvec3 equal(const vec3 u, const vec3 v)
+{
+   __asm vec4_seq __retVal.xyz, u, v;
 }
 
-bvec4 equal (vec4 v, vec4 u) {
-    return bvec4 (v.x == u.x, v.y == u.y, v.z == u.z, v.w == u.w);
+bvec4 equal(const vec4 u, const vec4 v)
+{
+   __asm vec4_seq __retVal, u, v;
 }
 
-bvec2 equal (ivec2 v, ivec2 u) {
-    return bvec2 (v.x == u.x, v.y == u.y);
+bvec2 equal(const ivec2 u, const ivec2 v)
+{
+   __asm vec4_seq __retVal.xy, u, v;
 }
 
-bvec3 equal (ivec3 v, ivec3 u) {
-    return bvec3 (v.x == u.x, v.y == u.y, v.z == u.z);
+bvec3 equal(const ivec3 u, const ivec3 v)
+{
+   __asm vec4_seq __retVal.xyz, u, v;
 }
 
-bvec4 equal (ivec4 v, ivec4 u) {
-    return bvec4 (v.x == u.x, v.y == u.y, v.z == u.z, v.w == u.w);
+bvec4 equal(const ivec4 u, const ivec4 v)
+{
+   __asm vec4_seq __retVal, u, v;
 }
 
-bvec2 notEqual (vec2 v, vec2 u) {
-    return bvec2 (v.x != u.x, v.y != u.y);
+
+//// notEqual
+
+bvec2 notEqual(const vec2 u, const vec2 v)
+{
+   __asm vec4_sne __retVal.xy, u, v;
 }
 
-bvec3 notEqual (vec3 v, vec3 u) {
-    return bvec3 (v.x != u.x, v.y != u.y, v.z != u.z);
+bvec3 notEqual(const vec3 u, const vec3 v)
+{
+   __asm vec4_sne __retVal.xyz, u, v;
 }
 
-bvec4 notEqual (vec4 v, vec4 u) {
-    return bvec4 (v.x != u.x, v.y != u.y, v.z != u.z, v.w != u.w);
+bvec4 notEqual(const vec4 u, const vec4 v)
+{
+   __asm vec4_sne __retVal, u, v;
 }
 
-bvec2 notEqual (ivec2 v, ivec2 u) {
-    return bvec2 (v.x != u.x, v.y != u.y);
+bvec2 notEqual(const ivec2 u, const ivec2 v)
+{
+   __asm vec4_sne __retVal.xy, u, v;
 }
 
-bvec3 notEqual (ivec3 v, ivec3 u) {
-    return bvec3 (v.x != u.x, v.y != u.y, v.z != u.z);
+bvec3 notEqual(const ivec3 u, const ivec3 v)
+{
+   __asm vec4_sne __retVal.xyz, u, v;
 }
 
-bvec4 notEqual (ivec4 v, ivec4 u) {
-    return bvec4 (v.x != u.x, v.y != u.y, v.z != u.z, v.w != u.w);
+bvec4 notEqual(const ivec4 u, const ivec4 v)
+{
+   __asm vec4_sne __retVal, u, v;
 }
 
-bool any (bvec2 v) {
-    return v.x || v.y;
+
+//// any
+
+bool any(const bvec2 v)
+{
+   float sum;
+   __asm vec4_add sum.x, v.x, v.y;
+   __asm vec4_sne __retVal.x, sum.x, 0.0;
 }
 
-bool any (bvec3 v) {
-    return v.x || v.y || v.z;
+bool any(const bvec3 v)
+{
+   float sum;
+   __asm vec4_add sum.x, v.x, v.y;
+   __asm vec4_add sum.x, sum.x, v.z;
+   __asm vec4_sne __retVal.x, sum.x, 0.0;
 }
 
-bool any (bvec4 v) {
-    return v.x || v.y || v.z || v.w;
+bool any(const bvec4 v)
+{
+   float sum;
+   __asm vec4_add sum.x, v.x, v.y;
+   __asm vec4_add sum.x, sum.x, v.z;
+   __asm vec4_add sum.x, sum.x, v.w;
+   __asm vec4_sne __retVal.x, sum.x, 0.0;
 }
 
-bool all (bvec2 v) {
+
+//// all
+
+bool all (const vec2 v)
+{
+   float prod;
+   __asm vec4_multiply prod.x, v.x, v.y;
+   __asm vec4_sne __retVal.x, prod.x, 0.0;
     return v.x && v.y;
 }
 
-bool all (bvec3 v) {
-    return v.x && v.y && v.z;
+bool all (const bvec3 v)
+{
+   float prod;
+   __asm vec4_multiply prod.x, v.x, v.y;
+   __asm vec4_multiply prod.x, prod.x, v.z;
+   __asm vec4_sne __retVal.x, prod.x, 0.0;
 }
 
-bool all (bvec4 v) {
-    return v.x && v.y && v.z && v.w;
+bool all (const bvec4 v)
+{
+   float prod;
+   __asm vec4_multiply prod.x, v.x, v.y;
+   __asm vec4_multiply prod.x, prod.x, v.z;
+   __asm vec4_multiply prod.x, prod.x, v.w;
+   __asm vec4_sne __retVal.x, prod.x, 0.0;
 }
 
-bvec2 not (bvec2 v) {
-    return bvec2 (!v.x, !v.y);
+
+
+//// not
+
+bvec2 not (const bvec2 v)
+{
+   __asm vec4_seq __retVal.xy, v, 0.0;
 }
 
-bvec3 not (bvec3 v) {
-    return bvec3 (!v.x, !v.y, !v.z);
+bvec3 not (const bvec3 v)
+{
+   __asm vec4_seq __retVal.xyz, v, 0.0;
 }
 
-bvec4 not (bvec4 v) {
-    return bvec4 (!v.x, !v.y, !v.z, !v.w);
+bvec4 not (const bvec4 v)
+{
+   __asm vec4_seq __retVal, v, 0.0;
 }
 
-//
-// 8.7 Texture Lookup Functions
-//
 
-vec4 texture1D (sampler1D sampler, float coord) {
-    vec4 texel;
-    __asm vec4_tex1d texel, sampler, coord, 0.0;
-    return texel;
+
+//// Texture Lookup Functions  (for both fragment and vertex shaders)
+
+vec4 texture1D(const sampler1D sampler, const float coord)
+{
+   __asm vec4_tex1d __retVal, sampler, coord;
 }
 
-vec4 texture1DProj (sampler1D sampler, vec2 coord) {
-    return texture1D (sampler, coord.s / coord.t);
+vec4 texture1DProj(const sampler1D sampler, const vec2 coord)
+{
+   // new coord with .z moved to .w
+   vec4 coord4;
+   coord4.x = coord.x;
+   coord4.w = coord.y;
+   __asm vec4_texp1d __retVal, sampler, coord4;
 }
 
-vec4 texture1DProj (sampler1D sampler, vec4 coord) {
-    return texture1D (sampler, coord.s / coord.q);
+vec4 texture1DProj(const sampler1D sampler, const vec4 coord)
+{
+   __asm vec4_texp1d __retVal, sampler, coord;
 }
 
-vec4 texture2D (sampler2D sampler, vec2 coord) {
-    vec4 texel;
-    __asm vec4_tex2d texel, sampler, coord, 0.0;
-    return texel;
+
+vec4 texture2D(const sampler2D sampler, const vec2 coord)
+{
+   __asm vec4_tex2d __retVal, sampler, coord;
 }
 
-vec4 texture2DProj (sampler2D sampler, vec3 coord) {
-    return texture2D (sampler, vec2 (coord.s / coord.p, coord.t / coord.p));
+vec4 texture2DProj(const sampler2D sampler, const vec3 coord)
+{
+   // new coord with .z moved to .w
+   vec4 coord4;
+   coord4.xy = coord.xy;
+   coord4.w = coord.z;
+    __asm vec4_texp2d __retVal, sampler, coord4;
 }
 
-vec4 texture2DProj (sampler2D sampler, vec4 coord) {
-    return texture2D (sampler, vec2 (coord.s / coord.q, coord.t / coord.q));
+vec4 texture2DProj(const sampler2D sampler, const vec4 coord)
+{
+   __asm vec4_texp2d __retVal, sampler, coord;
 }
 
-vec4 texture3D (sampler3D sampler, vec3 coord) {
-    vec4 texel;
-    __asm vec4_tex3d texel, sampler, coord, 0.0;
-    return texel;
+
+vec4 texture3D(const sampler3D sampler, const vec3 coord)
+{
+   __asm vec4_tex3d __retVal, sampler, coord;
 }
 
-vec4 texture3DProj (sampler3D sampler, vec4 coord) {
-    return texture3D (sampler, vec3 (coord.s / coord.q, coord.t / coord.q, coord.p / coord.q));
+vec4 texture3DProj(const sampler3D sampler, const vec4 coord)
+{
+   __asm vec4_texp3d __retVal, sampler, coord;
 }
 
-vec4 textureCube (samplerCube sampler, vec3 coord) {
-    vec4 texel;
-    __asm vec4_texcube texel, sampler, coord, 0.0;
-    return texel;
+
+vec4 textureCube(const samplerCube sampler, const vec3 coord)
+{
+   __asm vec4_texcube __retVal, sampler, coord;
 }
 
+
+
 vec4 shadow1D (sampler1DShadow sampler, vec3 coord) {
     vec4 texel;
     __asm vec4_shad1d texel, sampler, coord, 0.0;
@@ -1347,123 +1686,107 @@ vec4 shadow2DProj (sampler2DShadow sampler, vec4 coord) {
 // AUTHOR: Stefan Gustavson (stegu@itn.liu.se), Nov 26, 2005
 //
 
-float noise1 (float x) {
-    float a;
-    __asm float_noise1 a, x;
-    return a;
+float noise1(const float x)
+{
+   __asm float_noise1 __retVal, x;
 }
 
-float noise1 (vec2 x) {
-    float a;
-    __asm float_noise2 a, x;
-    return a;
-}
 
-float noise1 (vec3 x) {
-    float a;
-    __asm float_noise3 a, x;
-    return a;
+float noise1(const vec2 x)
+{
+    __asm float_noise2 __retVal, x;
 }
 
-float noise1 (vec4 x) {
-    float a;
-    __asm float_noise4 a, x;
-    return a;
+float noise1(const vec3 x)
+{
+    __asm float_noise3 __retVal, x;
 }
 
-vec2 noise2 (float x) {
-    return vec2 (
-        noise1 (x),
-        noise1 (x + 19.34)
-    );
+float noise1(const vec4 x)
+{
+    __asm float_noise4 __retVal, x;
 }
 
-vec2 noise2 (vec2 x) {
-    return vec2 (
-        noise1 (x),
-        noise1 (x + vec2 (19.34, 7.66))
-    );
+vec2 noise2(const float x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + 19.34);
 }
 
-vec2 noise2 (vec3 x) {
-    return vec2 (
-        noise1 (x),
-        noise1 (x + vec3 (19.34, 7.66, 3.23))
-    );
+vec2 noise2(const vec2 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec2(19.34, 7.66));
 }
 
-vec2 noise2 (vec4 x) {
-    return vec2 (
-        noise1 (x),
-        noise1 (x + vec4 (19.34, 7.66, 3.23, 2.77))
-    );
+vec2 noise2(const vec3 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec3(19.34, 7.66, 3.23));
 }
 
-vec3 noise3 (float x) {
-    return vec3 (
-        noise1 (x),
-        noise1 (x + 19.34),
-        noise1 (x + 5.47)
-    );
+vec2 noise2(const vec4 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec4(19.34, 7.66, 3.23, 2.77));
 }
 
-vec3 noise3 (vec2 x) {
-    return vec3 (
-        noise1 (x),
-        noise1 (x + vec2 (19.34, 7.66)),
-        noise1 (x + vec2 (5.47, 17.85))
-    );
+vec3 noise3(const float x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + 19.34);
+   __retVal.z = noise1(x + 5.47);
 }
 
-vec3 noise3 (vec3 x) {
-    return vec3 (
-        noise1 (x),
-        noise1 (x + vec3 (19.34, 7.66, 3.23)),
-        noise1 (x + vec3 (5.47, 17.85, 11.04))
-    );
+vec3 noise3(const vec2 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec2(19.34, 7.66));
+   __retVal.z = noise1(x + vec2(5.47, 17.85));
 }
 
-vec3 noise3 (vec4 x) {
-    return vec3 (
-        noise1 (x),
-        noise1 (x + vec4 (19.34, 7.66, 3.23, 2.77)),
-        noise1 (x + vec4 (5.47, 17.85, 11.04, 13.19))
-    );
+vec3 noise3(const vec3 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec3(19.34, 7.66, 3.23));
+   __retVal.z = noise1(x + vec3(5.47, 17.85, 11.04));
 }
 
-vec4 noise4 (float x) {
-    return vec4 (
-        noise1 (x),
-        noise1 (x + 19.34),
-        noise1 (x + 5.47),
-        noise1 (x + 23.54)
-    );
+vec3 noise3(const vec4 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec4(19.34, 7.66, 3.23, 2.77));
+   __retVal.z = noise1(x + vec4(5.47, 17.85, 11.04, 13.19));
 }
 
-vec4 noise4 (vec2 x) {
-    return vec4 (
-        noise1 (x),
-        noise1 (x + vec2 (19.34, 7.66)),
-        noise1 (x + vec2 (5.47, 17.85)),
-        noise1 (x + vec2 (23.54, 29.11))
-    );
+vec4 noise4(const float x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + 19.34);
+   __retVal.z = noise1(x + 5.47);
+   __retVal.w = noise1(x + 23.54);
 }
 
-vec4 noise4 (vec3 x) {
-    return vec4 (
-        noise1 (x),
-        noise1 (x + vec3 (19.34, 7.66, 3.23)),
-        noise1 (x + vec3 (5.47, 17.85, 11.04)),
-        noise1 (x + vec3 (23.54, 29.11, 31.91))
-    );
+vec4 noise4(const vec2 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec2 (19.34, 7.66));
+   __retVal.z = noise1(x + vec2 (5.47, 17.85));
+   __retVal.w = noise1(x + vec2 (23.54, 29.11));
 }
 
-vec4 noise4 (vec4 x) {
-    return vec4 (
-        noise1 (x),
-        noise1 (x + vec4 (19.34, 7.66, 3.23, 2.77)),
-        noise1 (x + vec4 (5.47, 17.85, 11.04, 13.19)),
-        noise1 (x + vec4 (23.54, 29.11, 31.91, 37.48))
-    );
+vec4 noise4(const vec3 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec3(19.34, 7.66, 3.23));
+   __retVal.z = noise1(x + vec3(5.47, 17.85, 11.04));
+   __retVal.w = noise1(x + vec3(23.54, 29.11, 31.91));
 }
 
+vec4 noise4(const vec4 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec4(19.34, 7.66, 3.23, 2.77));
+   __retVal.z = noise1(x + vec4(5.47, 17.85, 11.04, 13.19));
+   __retVal.w = noise1(x + vec4(23.54, 29.11, 31.91, 37.48));
+}