1 files changed, 1859 insertions, 892 deletions
diff --git a/src/mesa/shader/slang/library/slang_core.gc b/src/mesa/shader/slang/library/slang_core.gc
index 7a721a5a1d..927ca048d7 100755..100644
--- a/src/mesa/shader/slang/library/slang_core.gc
+++ b/src/mesa/shader/slang/library/slang_core.gc
@@ -23,1493 +23,2460 @@
  */
 
 //
-// This file defines nearly all constructors and operators for built-in data types, using
-// extended language syntax. In general, compiler treats constructors and operators as
-// ordinary functions with some exceptions. For example, the language does not allow
-// functions to be called in constant expressions - here the exception is made to allow it.
+// This file defines nearly all constructors and operators for built-in data
+// types, using extended language syntax. In general, compiler treats
+// constructors and operators as ordinary functions with some exceptions.
+// For example, the language does not allow functions to be called in
+// constant expressions - here the exception is made to allow it.
 //
-// Each implementation provides its own version of this file. Each implementation can define
-// the required set of operators and constructors in its own fashion.
+// Each implementation provides its own version of this file. Each
+// implementation can define the required set of operators and constructors
+// in its own fashion.
 //
-// The extended language syntax is only present when compiling this file. It is implicitly
-// included at the very beginning of the compiled shader, so no built-in functions can be
-// used.
+// The extended language syntax is only present when compiling this file.
+// It is implicitly included at the very beginning of the compiled shader,
+// so no built-in functions can be used.
 //
-// To communicate with the implementation, a special extended "__asm" keyword is used, followed
-// by an instruction name (any valid identifier), a destination variable identifier and a
-// a list of zero or more source variable identifiers. A variable identifier is a variable name
-// declared earlier in the code (as a function parameter, local or global variable).
-// An instruction name designates an instruction that must be exported by the implementation.
-// Each instruction receives data from source variable identifiers and returns data in the
-// destination variable identifier.
+// To communicate with the implementation, a special extended "__asm" keyword
+// is used, followed by an instruction name (any valid identifier), a
+// destination variable identifier and a list of zero or more source
+// variable identifiers.
 //
-// It is up to the implementation how to define a particular operator or constructor. If it is
-// expected to being used rarely, it can be defined in terms of other operators and constructors,
+// A variable identifier is a variable name declared earlier in the code
+// (as a function parameter, local or global variable).
+//
+// An instruction name designates an instruction that must be exported
+// by the implementation.  Each instruction receives data from source
+// variable identifiers and returns data in the destination variable
+// identifier.
+//
+// It is up to the implementation how to define a particular operator
+// or constructor. If it is expected to being used rarely, it can be
+// defined in terms of other operators and constructors,
 // for example:
 //
 // ivec2 __operator + (const ivec2 x, const ivec2 y) {
 //    return ivec2 (x[0] + y[0], x[1] + y[1]);
 // }
 //
-// If a particular operator or constructor is expected to be used very often or is an atomic
-// operation (that is, an operation that cannot be expressed in terms of other operations or
-// would create a dependency cycle) it must be defined using one or more __asm constructs.
+// If a particular operator or constructor is expected to be used very
+// often or is an atomic operation (that is, an operation that cannot be
+// expressed in terms of other operations or would create a dependency
+// cycle) it must be defined using one or more __asm constructs.
 //
-// Each implementation must define constructors for all scalar types (bool, float, int).
-// There are 9 scalar-to-scalar constructors (including identity constructors). However,
-// since the language introduces special constructors (like matrix constructor with a single
+// Each implementation must define constructors for all scalar types
+// (bool, float, int).  There are 9 scalar-to-scalar constructors
+// (including identity constructors). However, since the language
+// introduces special constructors (like matrix constructor with a single
 // scalar value), implementations must also implement these cases.
 // The compiler provides the following algorithm when resolving a constructor:
 // - try to find a constructor with a prototype matching ours,
-// - if no constructor is found and this is a scalar-to-scalar constructor, raise an error,
+// - if no constructor is found and this is a scalar-to-scalar constructor,
+//   raise an error,
 // - if a constructor is found, execute it and return,
-// - count the size of the constructor parameter list - if it is less than the size of
-//   our constructor's type, raise an error,
-// - for each parameter in the list do a recursive constructor matching for appropriate
-//   scalar fields in the constructed variable,
+// - count the size of the constructor parameter list - if it is less than
+//   the size of our constructor's type, raise an error,
+// - for each parameter in the list do a recursive constructor matching for
+//   appropriate scalar fields in the constructed variable,
 //
-// Each implementation must also define a set of operators that deal with built-in data types.
+// Each implementation must also define a set of operators that deal with
+// built-in data types.
 // There are four kinds of operators:
-// 1) Operators that are implemented only by the compiler: "()" (function call), "," (sequence)
-//    and "?:" (selection).
-// 2) Operators that are implemented by the compiler by expressing it in terms of other operators:
+// 1) Operators that are implemented only by the compiler: "()" (function
+//    call), "," (sequence) and "?:" (selection).
+// 2) Operators that are implemented by the compiler by expressing it in
+//    terms of other operators:
 //    - "." (field selection) - translated to subscript access,
-//    - "&&" (logical and) - translated to "<left_expr> ? <right_expr> : false",
+//    - "&&" (logical and) - translated to "<left_expr> ? <right_expr> :
+//      false",
 //    - "||" (logical or) - translated to "<left_expr> ? true : <right_expr>",
-// 3) Operators that can be defined by the implementation and if the required prototype is not
-//    found, standard behaviour is used:
-//    - "==", "!=", "=" (equality, assignment) - compare or assign matching fields one-by-one;
-//      note that at least operators for scalar data types must be defined by the implementation
-//      to get it work,
-// 4) All other operators not mentioned above. If no required prototype is found, an error is
-//    raised. An implementation must follow the language specification to provide all valid
-//    operator prototypes.
+// 3) Operators that can be defined by the implementation and if the required
+//    prototype is not found, standard behaviour is used:
+//    - "==", "!=", "=" (equality, assignment) - compare or assign
+//      matching fields one-by-one;
+//      note that at least operators for scalar data types must be defined
+//      by the implementation to get it work,
+// 4) All other operators not mentioned above. If no required prototype is
+//    found, an error is raised. An implementation must follow the language
+//    specification to provide all valid operator prototypes.
 //
 
-int __constructor (const float f) {
-    int i;
-    __asm float_to_int i, f;
-    return i;
+
+
+//// Basic, scalar constructors/casts
+
+int __constructor(const float f)
+{
+   __asm float_to_int __retVal, f;
 }
 
-bool __constructor (const int i) {
-    return i != 0;
+bool __constructor(const int i)
+{
+   const float zero = 0.0;
+   __asm vec4_seq __retVal, i, zero;
 }
 
-bool __constructor (const float f) {
-    return f != 0.0;
+bool __constructor(const float f)
+{
+   const float zero = 0.0;
+   __asm vec4_seq __retVal, i, zero;
 }
 
-int __constructor (const bool b) {
-    return b ? 1 : 0;
+int __constructor(const bool b)
+{
+   __retVal = b;
 }
 
-float __constructor (const bool b) {
-    return b ? 1.0 : 0.0;
+float __constructor(const bool b)
+{
+   __retVal = b;
 }
 
-float __constructor (const int i) {
-    float f;
-    __asm int_to_float f, i;
-    return f;
+float __constructor(const int i)
+{
+    __asm int_to_float __retVal, i;
 }
 
-bool __constructor (const bool b) {
-    return b;
+bool __constructor(const bool b)
+{
+   __retVal = b;
 }
 
-int __constructor (const int i) {
-    return i;
+int __constructor(const int i)
+{
+   __retVal = i;
 }
 
-float __constructor (const float f) {
-    return f;
+float __constructor(const float f)
+{
+   __retVal = f;
 }
 
-vec2 __constructor (const float f) {
-    return vec2 (f, f);
+
+//// vec2 constructors
+
+vec2 __constructor(const float x, const float y)
+{
+   __retVal.x = x;
+   __retVal.y = y;
 }
 
-vec2 __constructor (const int i) {
-    float x;
-    __asm int_to_float x, i;
-    return vec2 (x);
+vec2 __constructor(const float f)
+{
+   __retVal.xy = f.xx;
 }
 
-vec2 __constructor (const bool b) {
-    return vec2 (b ? 1.0 : 0.0);
+vec2 __constructor(const int i)
+{
+   __retVal.xy = i.xx;
 }
 
-vec3 __constructor (const float f) {
-    return vec3 (f, f, f);
+vec2 __constructor(const bool b)
+{
+   __retVal.xy = b.xx;
 }
 
-vec3 __constructor (const int i) {
-    float x;
-    __asm int_to_float x, i;
-    return vec3 (x);
+vec2 __constructor(const vec3 v)
+{
+   __retVal.xy = v.xy;
 }
 
-vec3 __constructor (const bool b) {
-    return vec3 (b ? 1.0 : 0.0);
+
+//// vec3 constructors
+
+vec3 __constructor(const float x, const float y, const float z)
+{
+   __retVal.x = x;
+   __retVal.y = y;
+   __retVal.z = z;
 }
 
-vec4 __constructor (const float f) {
-    return vec4 (f, f, f, f);
+vec3 __constructor(const float f)
+{
+   __retVal.xyz = f.xxx;
 }
 
-vec4 __constructor (const int i) {
-    float x;
-    __asm int_to_float x, i;
-    return vec4 (x);
+vec3 __constructor(const int i)
+{
+   __asm int_to_float __retVal.xyz, i.xxx;
 }
 
-vec4 __constructor (const bool b) {
-    return vec4 (b ? 1.0 : 0.0);
+vec3 __constructor(const bool b)
+{
+   __retVal.xyz = b.xxx;
 }
 
-ivec2 __constructor (const int i) {
-    return ivec2 (i, i);
+vec3 __constructor(const vec4 v)
+{
+   __retVal.xyz = v.xyz;
 }
 
-ivec2 __constructor (const float f) {
-    return ivec2 (int (f));
+
+//// vec4 constructors
+
+vec4 __constructor(const float x, const float y, const float z, const float w)
+{
+   __retVal.x = x;
+   __retVal.y = y;
+   __retVal.z = z;
+   __retVal.w = w;
 }
 
-ivec2 __constructor (const bool b) {
-    return ivec2 (int (b));
+vec4 __constructor(const float f)
+{
+   __retVal = f.xxxx;
 }
 
-ivec3 __constructor (const int i) {
-    return ivec3 (i, i, i);
+vec4 __constructor(const int i)
+{
+   __retVal = i.xxxx;
 }
 
-ivec3 __constructor (const float f) {
-    return ivec3 (int (f));
+vec4 __constructor(const bool b)
+{
+   __retVal = b.xxxx;
 }
 
-ivec3 __constructor (const bool b) {
-    return ivec3 (int (b));
+vec4 __constructor(const vec3 v3, const float f)
+{
+   // XXX this constructor shouldn't be needed anymore
+   __retVal.xyz = v3;
+   __retVal.w = f;
 }
 
-ivec4 __constructor (const int i) {
-    return ivec4 (i, i, i, i);
+
+//// ivec2 constructors
+
+ivec2 __constructor(const int i, const int j)
+{
+   __retVal.x = i;
+   __retVal.y = j;
 }
 
-ivec4 __constructor (const float f) {
-    return ivec4 (int (f));
+ivec2 __constructor(const int i)
+{
+   __retVal.xy = i.xx;
 }
 
-ivec4 __constructor (const bool b) {
-    return ivec4 (int (b));
+ivec2 __constructor(const float f)
+{
+   __asm float_to_int __retVal.xy, f.xx;
 }
 
-bvec2 __constructor (const bool b) {
-    return bvec2 (b, b);
+ivec2 __constructor(const bool b)
+{
+   __asm float_to_int __retVal.xy, b.xx;
 }
 
-bvec2 __constructor (const float f) {
-    return bvec2 (bool (f));
+
+//// ivec3 constructors
+
+ivec3 __constructor(const int i, const int j, const int k)
+{
+   __retVal.x = i;
+   __retVal.y = j;
+   __retVal.z = k;
 }
 
-bvec2 __constructor (const int i) {
-    return bvec2 (bool (i));
+ivec3 __constructor(const int i)
+{
+   __retVal.xyz = i.xxx;
 }
 
-bvec3 __constructor (const bool b) {
-    return bvec3 (b, b, b);
+ivec3 __constructor(const float f)
+{
+   __retVal.xyz = f.xxx;
 }
 
-bvec3 __constructor (const float f) {
-    return bvec3 (bool (f));
+ivec3 __constructor(const bool b)
+{
+   __retVal.xyz = b.xxx;
 }
 
-bvec3 __constructor (const int i) {
-    return bvec3 (bool (i));
+
+//// ivec4 constructors
+
+ivec4 __constructor(const int x, const int y, const int z, const int w)
+{
+   __retVal.x = x;
+   __retVal.y = y;
+   __retVal.z = z;
+   __retVal.w = w;
 }
 
-bvec4 __constructor (const bool b) {
-    return bvec4 (b, b, b, b);
+ivec4 __constructor(const int i)
+{
+   __retVal = i.xxxx;
 }
 
-bvec4 __constructor (const float f) {
-    return bvec4 (bool (f));
+ivec4 __constructor(const float f)
+{
+   __asm float_to_int __retVal, f.xxxx;
 }
 
-bvec4 __constructor (const int i) {
-    return bvec4 (bool (i));
+ivec4 __constructor(const bool b)
+{
+   __retVal = b.xxxx;
 }
 
-mat2 __constructor (const float f) {
-    return mat2 (f, 0.0, 0.0, f);
+
+//// bvec2 constructors
+
+bvec2 __constructor(const bool b1, const bool b2)
+{
+   __retVal.x = b1;
+   __retVal.y = b2;
 }
 
-mat2 __constructor (const int i) {
-    float x;
-    __asm int_to_float x, i;
-    return mat2 (x);
+bvec2 __constructor(const bool b)
+{
+   __retVal.xy = b.xx;
 }
 
-mat2 __constructor (const bool b) {
-    return mat2 (b ? 1.0 : 0.0);
+bvec2 __constructor(const float f)
+{
+   const vec2 zero = vec2(0.0, 0.0);
+   __asm vec4_seq __retVal.xy, f.xx, zero;
 }
 
-mat3 __constructor (const float f) {
-    return mat3 (f, 0.0, 0.0, 0.0, f, 0.0, 0.0, 0.0, f);
+bvec2 __constructor(const int i)
+{
+   const ivec2 zero = ivec2(0, 0);
+   __asm vec4_seq __retVal.xy, i.xx, zero;
 }
 
-mat3 __constructor (const int i) {
-    float x;
-    __asm int_to_float x, i;
-    return mat3 (x);
+
+//// bvec3 constructors
+
+bvec3 __constructor(const bool b1, const bool b2, const bool b3)
+{
+   __retVal.x = b1;
+   __retVal.y = b2;
+   __retVal.z = b3;
 }
 
-mat3 __constructor (const bool b) {
-    return mat3 (b ? 1.0 : 0.0);
+bvec3 __constructor(const bool b)
+{
+   __retVal.xyz = b.xxx;
 }
 
-mat4 __constructor (const float f) {
-    return mat4 (f, 0.0, 0.0, 0.0, 0.0, f, 0.0, 0.0, 0.0, 0.0, f, 0.0, 0.0, 0.0, 0.0, f);
+bvec3 __constructor(const float f)
+{
+   const vec3 zero = vec3(0.0, 0.0, 0.0);
+   __asm vec4_seq __retVal.xyz, f.xxx, zero;
 }
 
-mat4 __constructor (const int i) {
-    float x;
-    __asm int_to_float x, i;
-    return mat4 (x);
+bvec3 __constructor(const int i)
+{
+   const ivec3 zero = ivec3(0, 0, 0);
+   __asm vec4_seq __retVal.xyz, i.xxx, zero;
 }
 
-mat4 __constructor (const bool b) {
-    return mat4 (b ? 1.0 : 0.0);
+
+//// bvec4 constructors
+
+bvec4 __constructor(const bool b1, const bool b2, const bool b3, const bool b4)
+{
+   __retVal.x = b1;
+   __retVal.y = b2;
+   __retVal.z = b3;
+   __retVal.w = b4;
 }
 
-void __operator += (inout float a, const float b) {
-    __asm float_add a, a, b;
+bvec4 __constructor(const bool b)
+{
+   __retVal.xyzw = b.xxxx;
 }
 
-float __operator - (const float a) {
-    float b;
-    __asm float_negate b, a;
-    return b;
+bvec4 __constructor(const float f)
+{
+   const vec4 zero = vec4(0.0, 0.0, 0.0, 0.0);
+   __asm vec4_seq __retVal, f.xxxx, zero;
 }
 
-void __operator -= (inout float a, const float b) {
-    float c;
-    __asm float_negate c, b;
-    __asm float_add a, a, c;
+bvec4 __constructor(const int i)
+{
+   const ivec4 zero = ivec4(0, 0, 0, 0);
+   __asm vec4_seq __retVal, i.xxxx, zero;
 }
 
-void __operator *= (inout float a, const float b) {
-    __asm float_multiply a, a, b;
+
+
+//// mat2 constructors
+
+mat2 __constructor(const float m00, const float m10,
+                   const float m01, const float m11)
+{
+   __retVal[0].x = m00;
+   __retVal[0].y = m10;
+   __retVal[1].x = m01;
+   __retVal[1].y = m11;
 }
 
-void __operator /= (inout float a, const float b) {
-    __asm float_divide a, a, b;
+mat2 __constructor(const float f)
+{
+   __retVal[0].x = f;
+   __retVal[0].y = 0.0;
+   __retVal[1].x = 0.0;
+   __retVal[1].y = f;
 }
 
-float __operator + (const float a, const float b) {
-    float c;
-    __asm float_add c, a, b;
-    return c;
+mat2 __constructor(const int i)
+{
+   return mat2(float(i));
 }
 
-void __operator += (inout int a, const int b) {
-    a = int (float (a) + float (b));
+mat2 __constructor(const bool b)
+{
+   return mat2(float(b));
 }
 
-int __operator - (const int a) {
-    float x;
-    int b;
-    __asm int_to_float x, a;
-    __asm float_negate x, x;
-    __asm float_to_int b, x;
-    return b;
+mat2 __constructor(const vec2 c0, const vec2 c1)
+{
+   __retVal[0] = c0;
+   __retVal[1] = c1;
 }
 
-void __operator -= (inout int a, const int b) {
-    a += -b;
+
+//// mat3 constructors
+
+mat3 __constructor(const float m00, const float m10, const float m20,
+                   const float m01, const float m11, const float m21,
+                   const float m02, const float m12, const float m22)
+{
+   __retVal[0].x = m00;
+   __retVal[0].y = m10;
+   __retVal[0].z = m20;
+   __retVal[1].x = m01;
+   __retVal[1].y = m11;
+   __retVal[1].z = m21;
+   __retVal[2].x = m02;
+   __retVal[2].y = m12;
+   __retVal[2].z = m22;
 }
 
-float __operator * (const float a, const float b) {
-    float c;
-    __asm float_multiply c, a, b;
-    return c;
+mat3 __constructor(const float f)
+{
+   vec2 v = vec2(f, 0.0);
+   __retVal[0] = v.xyy;
+   __retVal[1] = v.yxy;
+   __retVal[2] = v.yyx;
 }
 
-void __operator *= (inout int a, const int b) {
-    a = int (float (a) * float (b));
+mat3 __constructor(const int i)
+{
+   return mat3(float(i));
 }
 
-float __operator / (const float a, const float b) {
-    float c;
-    __asm float_divide c, a, b;
-    return c;
+mat3 __constructor(const bool b)
+{
+   return mat3(float(b));
 }
 
-void __operator /= (inout int a, const int b) {
-    a = int (float (a) / float (b));
+mat3 __constructor(const vec3 c0, const vec3 c1, const vec3 c2)
+{
+   __retVal[0] = c0;
+   __retVal[1] = c1;
+   __retVal[2] = c2;
 }
 
-void __operator += (inout vec2 v, const vec2 u) {
-    v.x += u.x;
-    v.y += u.y;
+
+//// mat4 constructors
+
+mat4 __constructor(const float m00, const float m10, const float m20, const float m30,
+                   const float m01, const float m11, const float m21, const float m31,
+                   const float m02, const float m12, const float m22, const float m32,
+                   const float m03, const float m13, const float m23, const float m33)
+{
+   __retVal[0].x = m00;
+   __retVal[0].y = m10;
+   __retVal[0].z = m20;
+   __retVal[0].w = m30;
+   __retVal[1].x = m01;
+   __retVal[1].y = m11;
+   __retVal[1].z = m21;
+   __retVal[1].w = m31;
+   __retVal[2].x = m02;
+   __retVal[2].y = m12;
+   __retVal[2].z = m22;
+   __retVal[2].w = m32;
+   __retVal[3].x = m03;
+   __retVal[3].y = m13;
+   __retVal[3].z = m23;
+   __retVal[3].w = m33;
 }
 
-void __operator -= (inout vec2 v, const vec2 u) {
-    v.x -= u.x;
-    v.y -= u.y;
+
+mat4 __constructor(const float f)
+{
+   vec2 v = vec2(f, 0.0);
+   __retVal[0] = v.xyyy;
+   __retVal[1] = v.yxyy;
+   __retVal[2] = v.yyxy;
+   __retVal[3] = v.yyyx;
 }
 
-void __operator *= (inout vec2 v, const vec2 u) {
-    v.x *= u.x;
-    v.y *= u.y;
+mat4 __constructor(const int i)
+{
+   return mat4(float(i));
 }
 
-void __operator /= (inout vec2 v, const vec2 u) {
-    v.x /= u.x;
-    v.y /= u.y;
+mat4 __constructor(const bool b)
+{
+   return mat4(float(b));
 }
 
-void __operator += (inout vec3 v, const vec3 u) {
-    v.x += u.x;
-    v.y += u.y;
-    v.z += u.z;
+mat4 __constructor(const vec4 c0, const vec4 c1, const vec4 c2, const vec4 c3)
+{
+   __retVal[0] = c0;
+   __retVal[1] = c1;
+   __retVal[2] = c2;
+   __retVal[3] = c3;
 }
 
-void __operator -= (inout vec3 v, const vec3 u) {
-    v.x -= u.x;
-    v.y -= u.y;
-    v.z -= u.z;
+
+
+//// Basic int operators
+
+int __operator + (const int a, const int b)
+{
+// XXX If we ever have int registers, we'll do something like this:
+// XXX For now, mostly treat ints as floats.
+//    float x, y;
+//    __asm int_to_float x, a;
+//    __asm int_to_float y, b;
+//    __asm vec4_add x.x, x.x, y.x;
+//    __asm float_to_int __retVal, x;
+   float x;
+   __asm vec4_add x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator *= (inout vec3 v, const vec3 u) {
-    v.x *= u.x;
-    v.y *= u.y;
-    v.z *= u.z;
+int __operator - (const int a, const int b)
+{
+   float x;
+   __asm vec4_subtract x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator /= (inout vec3 v, const vec3 u) {
-    v.x /= u.x;
-    v.y /= u.y;
-    v.z /= u.z;
+int __operator * (const int a, const int b)
+{
+   float x;
+   __asm vec4_multiply x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator += (inout vec4 v, const vec4 u) {
-    v.x += u.x;
-    v.y += u.y;
-    v.z += u.z;
-    v.w += u.w;
+int __operator / (const int a, const int b)
+{
+   float bInv, x;
+   __asm float_rcp bInv, b;
+   __asm vec4_multiply x, a, bInv;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator -= (inout vec4 v, const vec4 u) {
-    v.x -= u.x;
-    v.y -= u.y;
-    v.z -= u.z;
-    v.w -= u.w;
+
+//// Basic ivec2 operators
+
+ivec2 __operator + (const ivec2 a, const ivec2 b)
+{
+   vec2 x;
+   __asm vec4_add x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator *= (inout vec4 v, const vec4 u) {
-    v.x *= u.x;
-    v.y *= u.y;
-    v.z *= u.z;
-    v.w *= u.w;
+ivec2 __operator - (const ivec2 a, const ivec2 b)
+{
+   vec2 x;
+   __asm vec4_subtract x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator /= (inout vec4 v, const vec4 u) {
-    v.x /= u.x;
-    v.y /= u.y;
-    v.z /= u.z;
-    v.w /= u.w;
+ivec2 __operator * (const ivec2 a, const ivec2 b)
+{
+   vec2 x;
+   __asm vec4_multiply x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator += (inout ivec2 v, const ivec2 u) {
-    v.x += u.x;
-    v.y += u.y;
+ivec2 __operator / (const ivec2 a, const ivec2 b)
+{
+   vec2 bInv, x;
+   __asm float_rcp bInv.x, b.x;
+   __asm float_rcp bInv.y, b.y;
+   __asm vec4_multiply x, a, bInv;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator -= (inout ivec2 v, const ivec2 u) {
-    v.x -= u.x;
-    v.y -= u.y;
+
+//// Basic ivec3 operators
+
+ivec3 __operator + (const ivec3 a, const ivec3 b)
+{
+   vec3 x;
+   __asm vec4_add x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator *= (inout ivec2 v, const ivec2 u) {
-    v.x *= u.x;
-    v.y *= u.y;
+ivec3 __operator - (const ivec3 a, const ivec3 b)
+{
+   vec3 x;
+   __asm vec4_subtract x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator /= (inout ivec2 v, const ivec2 u) {
-    v.x /= u.x;
-    v.y /= u.y;
+ivec3 __operator * (const ivec3 a, const ivec3 b)
+{
+   vec3 x;
+   __asm vec4_multiply x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator += (inout ivec3 v, const ivec3 u) {
-    v.x += u.x;
-    v.y += u.y;
-    v.z += u.z;
+ivec3 __operator / (const ivec3 a, const ivec3 b)
+{
+   vec3 bInv, x;
+   __asm float_rcp bInv.x, b.x;
+   __asm float_rcp bInv.y, b.y;
+   __asm float_rcp bInv.z, b.z;
+   __asm vec4_multiply x, a, bInv;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator -= (inout ivec3 v, const ivec3 u) {
-    v.x -= u.x;
-    v.y -= u.y;
-    v.z -= u.z;
+
+//// Basic ivec4 operators
+
+ivec4 __operator + (const ivec4 a, const ivec4 b)
+{
+   vec3 x;
+   __asm vec4_add x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator *= (inout ivec3 v, const ivec3 u) {
-    v.x *= u.x;
-    v.y *= u.y;
-    v.z *= u.z;
+ivec4 __operator - (const ivec4 a, const ivec4 b)
+{
+   vec4 x;
+   __asm vec4_subtract x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator /= (inout ivec3 v, const ivec3 u) {
-    v.x /= u.x;
-    v.y /= u.y;
-    v.z /= u.z;
+ivec4 __operator * (const ivec4 a, const ivec4 b)
+{
+   vec4 x;
+   __asm vec4_multiply x, a, b;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator += (inout ivec4 v, const ivec4 u) {
-    v.x += u.x;
-    v.y += u.y;
-    v.z += u.z;
-    v.w += u.w;
+ivec4 __operator / (const ivec4 a, const ivec4 b)
+{
+   vec4 bInv, x;
+   __asm float_rcp bInv.x, b.x;
+   __asm float_rcp bInv.y, b.y;
+   __asm float_rcp bInv.z, b.z;
+   __asm float_rcp bInv.w, b.w;
+   __asm vec4_multiply x, a, bInv;
+   __asm float_to_int __retVal, x;
 }
 
-void __operator -= (inout ivec4 v, const ivec4 u) {
-    v.x -= u.x;
-    v.y -= u.y;
-    v.z -= u.z;
-    v.w -= u.w;
+
+//// Basic float operators
+
+float __operator + (const float a, const float b)
+{
+   __asm vec4_add __retVal.x, a, b;
 }
 
-void __operator *= (inout ivec4 v, const ivec4 u) {
-    v.x *= u.x;
-    v.y *= u.y;
-    v.z *= u.z;
-    v.w *= u.w;
+float __operator - (const float a, const float b)
+{
+   __asm vec4_subtract __retVal.x, a, b;
 }
 
-void __operator /= (inout ivec4 v, const ivec4 u) {
-    v.x /= u.x;
-    v.y /= u.y;
-    v.z /= u.z;
-    v.w /= u.w;
+float __operator * (const float a, const float b)
+{
+    __asm vec4_multiply __retVal.x, a, b;
 }
 
-void __operator += (inout mat2 m, const mat2 n) {
-    m[0] += n[0];
-    m[1] += n[1];
+float __operator / (const float a, const float b)
+{
+   float bInv;
+   __asm float_rcp bInv.x, b.x;
+   __asm vec4_multiply __retVal.x, a, bInv;
 }
 
-void __operator -= (inout mat2 m, const mat2 n) {
-    m[0] -= n[0];
-    m[1] -= n[1];
+
+//// Basic vec2 operators
+
+vec2 __operator + (const vec2 v, const vec2 u)
+{
+   __asm vec4_add __retVal.xy, v, u;
 }
 
-vec2 __operator * (const mat2 m, const vec2 v) {
-    return vec2 (
-        v.x * m[0].x + v.y * m[1].x,
-        v.x * m[0].y + v.y * m[1].y
-    );
+vec2 __operator - (const vec2 v, const vec2 u)
+{
+    __asm vec4_subtract __retVal.xy, v, u;
 }
 
-mat2 __operator * (const mat2 m, const mat2 n) {
-    return mat2 (m * n[0], m * n[1]);
+vec2 __operator * (const vec2 v, const vec2 u)
+{
+    __asm vec4_multiply __retVal.xy, v, u;
 }
 
-void __operator *= (inout mat2 m, const mat2 n) {
-    m = m * n;
+vec2 __operator / (const vec2 v, const vec2 u)
+{
+   vec2 w; // = 1 / u
+   __asm float_rcp w.x, u.x;
+   __asm float_rcp w.y, u.y;
+   __asm vec4_multiply __retVal.xy, v, w;
 }
 
-void __operator /= (inout mat2 m, const mat2 n) {
-    m[0] /= n[0];
-    m[1] /= n[1];
+
+//// Basic vec3 operators
+
+vec3 __operator + (const vec3 v, const vec3 u)
+{
+   __asm vec4_add __retVal.xyz, v, u;
 }
 
-void __operator += (inout mat3 m, const mat3 n) {
-    m[0] += n[0];
-    m[1] += n[1];
-    m[2] += n[2];
+vec3 __operator - (const vec3 v, const vec3 u)
+{
+    __asm vec4_subtract __retVal.xyz, v, u;
 }
 
-void __operator -= (inout mat3 m, const mat3 n) {
-    m[0] -= n[0];
-    m[1] -= n[1];
-    m[2] -= n[2];
+vec3 __operator * (const vec3 v, const vec3 u)
+{
+    __asm vec4_multiply __retVal.xyz, v, u;
 }
 
-vec3 __operator * (const mat3 m, const vec3 v) {
-    return vec3 (
-        v.x * m[0].x + v.y * m[1].x + v.z * m[2].x,
-        v.x * m[0].y + v.y * m[1].y + v.z * m[2].y,
-        v.x * m[0].z + v.y * m[1].z + v.z * m[2].z
-    );
+vec3 __operator / (const vec3 v, const vec3 u)
+{
+   vec3 w; // = 1 / u
+   __asm float_rcp w.x, u.x;
+   __asm float_rcp w.y, u.y;
+   __asm float_rcp w.z, u.z;
+   __asm vec4_multiply __retVal.xyz, v, w;
 }
 
-mat3 __operator * (const mat3 m, const mat3 n) {
-    return mat3 (m * n[0], m * n[1], m * n[2]);
+
+//// Basic vec4 operators
+
+vec4 __operator + (const vec4 v, const vec4 u)
+{
+   __asm vec4_add __retVal, v, u;
 }
 
-void __operator *= (inout mat3 m, const mat3 n) {
-    m = m * n;
+vec4 __operator - (const vec4 v, const vec4 u)
+{
+    __asm vec4_subtract __retVal, v, u;
 }
 
-void __operator /= (inout mat3 m, const mat3 n) {
-    m[0] /= n[0];
-    m[1] /= n[1];
-    m[2] /= n[2];
+vec4 __operator * (const vec4 v, const vec4 u)
+{
+    __asm vec4_multiply __retVal, v, u;
 }
 
-void __operator += (inout mat4 m, const mat4 n) {
-    m[0] += n[0];
-    m[1] += n[1];
-    m[2] += n[2];
-    m[3] += n[3];
+vec4 __operator / (const vec4 v, const vec4 u)
+{
+   vec4 w; // = 1 / u
+   __asm float_rcp w.x, u.x;
+   __asm float_rcp w.y, u.y;
+   __asm float_rcp w.z, u.z;
+   __asm float_rcp w.w, u.w;
+   __asm vec4_multiply __retVal, v, w;
 }
 
-void __operator -= (inout mat4 m, const mat4 n) {
-    m[0] -= n[0];
-    m[1] -= n[1];
-    m[2] -= n[2];
-    m[3] -= n[3];
+
+
+
+//// Basic vec2/float operators
+
+vec2 __operator + (const float a, const vec2 u)
+{
+   __asm vec4_add __retVal.xy, a.xx, u.xy;
 }
 
-vec4 __operator * (const mat4 m, const vec4 v) {
-    return vec4 (
-        v.x * m[0].x + v.y * m[1].x + v.z * m[2].x + v.w * m[3].x,
-        v.x * m[0].y + v.y * m[1].y + v.z * m[2].y + v.w * m[3].y,
-        v.x * m[0].z + v.y * m[1].z + v.z * m[2].z + v.w * m[3].z,
-        v.x * m[0].w + v.y * m[1].w + v.z * m[2].w + v.w * m[3].w
-    );
+vec2 __operator + (const vec2 v, const float b)
+{
+   __asm vec4_add __retVal.xy, v.xy, b.xx;
 }
 
-mat4 __operator * (const mat4 m, const mat4 n) {
-    return mat4 (m * n[0], m * n[1], m * n[2], m * n[3]);
+vec2 __operator - (const float a, const vec2 u)
+{
+   __asm vec4_subtract __retVal.xy, a.xx, u.xy;
 }
 
-void __operator *= (inout mat4 m, const mat4 n) {
-    m = m * n;
+vec2 __operator - (const vec2 v, const float b)
+{
+   __asm vec4_subtract __retVal.xy, v.xy, b.xx;
 }
 
-void __operator /= (inout mat4 m, const mat4 n) {
-    m[0] /= n[0];
-    m[1] /= n[1];
-    m[2] /= n[2];
-    m[3] /= n[3];
+vec2 __operator * (const float a, const vec2 u)
+{
+   __asm vec4_multiply __retVal.xy, a.xx, u.xy;
 }
 
-void __operator += (inout vec2 v, const float a) {
-    v.x += a;
-    v.y += a;
+vec2 __operator * (const vec2 v, const float b)
+{
+   __asm vec4_multiply __retVal.xy, v.xy, b.xx;
 }
 
-void __operator -= (inout vec2 v, const float a) {
-    v.x -= a;
-    v.y -= a;
+vec2 __operator / (const float a, const vec2 u)
+{
+   vec2 invU;
+   __asm float_rcp invU.x, u.x;
+   __asm float_rcp invU.y, u.y;
+   __asm vec4_multiply __retVal.xy, a.xx, invU.xy;
 }
 
-void __operator *= (inout vec2 v, const float a) {
-    v.x *= a;
-    v.y *= a;
+vec2 __operator / (const vec2 v, const float b)
+{
+   float invB;
+   __asm float_rcp invB, b;
+   __asm vec4_multiply __retVal.xy, v.xy, invB.xx;
 }
 
-void __operator /= (inout vec2 v, const float a) {
-    v.x /= a;
-    v.y /= a;
+
+//// Basic vec3/float operators
+
+vec3 __operator + (const float a, const vec3 u)
+{
+   __asm vec4_add __retVal.xyz, a.xxx, u.xyz;
 }
 
-void __operator += (inout vec3 v, const float a) {
-    v.x += a;
-    v.y += a;
-    v.z += a;
+vec3 __operator + (const vec3 v, const float b)
+{
+   __asm vec4_add __retVal.xyz, v.xyz, b.xxx;
 }
 
-void __operator -= (inout vec3 v, const float a) {
-    v.x -= a;
-    v.y -= a;
-    v.z -= a;
+vec3 __operator - (const float a, const vec3 u)
+{
+   __asm vec4_subtract __retVal.xyz, a.xxx, u.xyz;
 }
 
-void __operator *= (inout vec3 v, const float a) {
-    v.x *= a;
-    v.y *= a;
-    v.z *= a;
+vec3 __operator - (const vec3 v, const float b)
+{
+   __asm vec4_subtract __retVal.xyz, v.xyz, b.xxx;
 }
 
-void __operator /= (inout vec3 v, const float a) {
-    v.x /= a;
-    v.y /= a;
-    v.z /= a;
+vec3 __operator * (const float a, const vec3 u)
+{
+   __asm vec4_multiply __retVal.xyz, a.xxx, u.xyz;
 }
 
-void __operator += (inout vec4 v, const float a) {
-    v.x += a;
-    v.y += a;
-    v.z += a;
-    v.w += a;
+vec3 __operator * (const vec3 v, const float b)
+{
+   __asm vec4_multiply __retVal.xyz, v.xyz, b.xxx;
 }
 
-void __operator -= (inout vec4 v, const float a) {
-    v.x -= a;
-    v.y -= a;
-    v.z -= a;
-    v.w -= a;
+vec3 __operator / (const float a, const vec3 u)
+{
+   vec3 invU;
+   __asm float_rcp invU.x, u.x;
+   __asm float_rcp invU.y, u.y;
+   __asm float_rcp invU.z, u.z;
+   __asm vec4_multiply __retVal.xyz, a.xxx, invU.xyz;
 }
 
-void __operator *= (inout vec4 v, const float a) {
-    v.x *= a;
-    v.y *= a;
-    v.z *= a;
-    v.w *= a;
+vec3 __operator / (const vec3 v, const float b)
+{
+   float invB;
+   __asm float_rcp invB, b;
+   __asm vec4_multiply __retVal.xyz, v.xyz, invB.xxx;
 }
 
-void __operator /= (inout vec4 v, const float a) {
-    v.x /= a;
-    v.y /= a;
-    v.z /= a;
-    v.w /= a;
+
+//// Basic vec4/float operators
+
+vec4 __operator + (const float a, const vec4 u)
+{
+   __asm vec4_add __retVal, a.xxxx, u;
 }
 
-void __operator += (inout mat2 m, const float a) {
-    m[0] += a;
-    m[1] += a;
+vec4 __operator + (const vec4 v, const float b)
+{
+   __asm vec4_add __retVal, v, b.xxxx;
 }
 
-void __operator -= (inout mat2 m, const float a) {
-    m[0] -= a;
-    m[1] -= a;
+vec4 __operator - (const float a, const vec4 u)
+{
+   __asm vec4_subtract __retVal, a.xxxx, u;
 }
 
-void __operator *= (inout mat2 m, const float a) {
-    m[0] *= a;
-    m[1] *= a;
+vec4 __operator - (const vec4 v, const float b)
+{
+   __asm vec4_subtract __retVal, v, b.xxxx;
 }
 
-void __operator /= (inout mat2 m, const float a) {
-    m[0] /= a;
-    m[1] /= a;
+vec4 __operator * (const float a, const vec4 u)
+{
+   __asm vec4_multiply __retVal, a.xxxx, u;
 }
 
-void __operator += (inout mat3 m, const float a) {
-    m[0] += a;
-    m[1] += a;
-    m[2] += a;
+vec4 __operator * (const vec4 v, const float b)
+{
+   __asm vec4_multiply __retVal, v, b.xxxx;
 }
 
-void __operator -= (inout mat3 m, const float a) {
-    m[0] -= a;
-    m[1] -= a;
-    m[2] -= a;
+vec4 __operator / (const float a, const vec4 u)
+{
+   vec4 invU;
+   __asm float_rcp invU.x, u.x;
+   __asm float_rcp invU.y, u.y;
+   __asm float_rcp invU.z, u.z;
+   __asm float_rcp invU.w, u.w;
+   __asm vec4_multiply __retVal, a.xxxx, invU;
 }
 
-void __operator *= (inout mat3 m, const float a) {
-    m[0] *= a;
-    m[1] *= a;
-    m[2] *= a;
+vec4 __operator / (const vec4 v, const float b)
+{
+   float invB;
+   __asm float_rcp invB, b;
+   __asm vec4_multiply __retVal, v, invB.xxxx;
 }
 
-void __operator /= (inout mat3 m, const float a) {
-    m[0] /= a;
-    m[1] /= a;
-    m[2] /= a;
+
+
+//// Basic ivec2/int operators
+
+ivec2 __operator + (const int a, const ivec2 u)
+{
+   __retVal = ivec2(a) + u;
 }
 
-void __operator += (inout mat4 m, const float a) {
-    m[0] += a;
-    m[1] += a;
-    m[2] += a;
-    m[3] += a;
+ivec2 __operator + (const ivec2 v, const int b)
+{
+   __retVal = v + ivec2(b);
 }
 
-void __operator -= (inout mat4 m, const float a) {
-    m[0] -= a;
-    m[1] -= a;
-    m[2] -= a;
-    m[3] -= a;
+ivec2 __operator - (const int a, const ivec2 u)
+{
+   __retVal = ivec2(a) - u;
 }
 
-void __operator *= (inout mat4 m, const float a) {
-    m[0] *= a;
-    m[1] *= a;
-    m[2] *= a;
-    m[3] *= a;
+ivec2 __operator - (const ivec2 v, const int b)
+{
+   __retVal = v - ivec2(b);
 }
 
-void __operator /= (inout mat4 m, const float a) {
-    m[0] /= a;
-    m[1] /= a;
-    m[2] /= a;
-    m[3] /= a;
+ivec2 __operator * (const int a, const ivec2 u)
+{
+   __retVal = ivec2(a) * u;
 }
 
-vec2 __operator * (const vec2 v, const mat2 m) {
-    return vec2 (
-        v.x * m[0].x + v.y * m[0].y,
-        v.x * m[1].x + v.y * m[1].y
-    );
+ivec2 __operator * (const ivec2 v, const int b)
+{
+   __retVal = v * ivec2(b);
 }
 
-void __operator *= (inout vec2 v, const mat2 m) {
-    v = v * m;
+ivec2 __operator / (const int a, const ivec2 u)
+{
+   __retVal = ivec2(a) / u;
 }
 
-vec3 __operator * (const vec3 v, const mat3 m) {
-    return vec3 (
-        v.x * m[0].x + v.y * m[0].y + v.z * m[0].z,
-        v.x * m[1].x + v.y * m[1].y + v.z * m[1].z,
-        v.x * m[2].x + v.y * m[2].y + v.z * m[2].z
-    );
+ivec2 __operator / (const ivec2 v, const int b)
+{
+   __retVal = v / ivec2(b);
 }
 
-void __operator *= (inout vec3 v, const mat3 m) {
-    v = v * m;
+
+//// Basic ivec3/int operators
+
+ivec3 __operator + (const int a, const ivec3 u)
+{
+   __retVal = ivec3(a) + u;
 }
 
-vec4 __operator * (const vec4 v, const mat4 m) {
-    return vec4 (
-        v.x * m[0].x + v.y * m[0].y + v.z * m[0].z + v.w * m[0].w,
-        v.x * m[1].x + v.y * m[1].y + v.z * m[1].z + v.w * m[1].w,
-        v.x * m[2].x + v.y * m[2].y + v.z * m[2].z + v.w * m[2].w,
-        v.x * m[3].x + v.y * m[3].y + v.z * m[3].z + v.w * m[3].w
-    );
+ivec3 __operator + (const ivec3 v, const int b)
+{
+   __retVal = v + ivec3(b);
 }
 
-void __operator *= (inout vec4 v, const mat4 m) {
-    v = v * m;
+ivec3 __operator - (const int a, const ivec3 u)
+{
+   __retVal = ivec3(a) - u;
 }
 
-float __operator - (const float a, const float b) {
-    float c;
-    __asm float_negate c, b;
-    __asm float_add    c, a, c;
-    return c;
+ivec3 __operator - (const ivec3 v, const int b)
+{
+   __retVal = v - ivec3(b);
 }
 
-int __operator + (const int a, const int b) {
-    float x, y;
-    int c;
-    __asm int_to_float x, a;
-    __asm int_to_float y, b;
-    __asm float_add    x, x, y;
-    __asm float_to_int c, x;
-    return c;
+ivec3 __operator * (const int a, const ivec3 u)
+{
+   __retVal = ivec3(a) * u;
 }
 
-int __operator - (const int a, const int b) {
-    float x, y;
-    int c;
-    __asm int_to_float x, a;
-    __asm int_to_float y, b;
-    __asm float_negate y, y;
-    __asm float_add    x, x, y;
-    __asm float_to_int c, x;
-    return c;
+ivec3 __operator * (const ivec3 v, const int b)
+{
+   __retVal = v * ivec3(b);
 }
 
-int __operator * (const int a, const int b) {
-    float x, y;
-    int c;
-    __asm int_to_float   x, a;
-    __asm int_to_float   y, b;
-    __asm float_multiply x, x, y;
-    __asm float_to_int   c, x;
-    return c;
+ivec3 __operator / (const int a, const ivec3 u)
+{
+   __retVal = ivec3(a) / u;
 }
 
-int __operator / (const int a, const int b) {
-    float x, y;
-    int c;
-    __asm int_to_float x, a;
-    __asm int_to_float y, b;
-    __asm float_divide x, x, y;
-    __asm float_to_int c, x;
-    return c;
+ivec3 __operator / (const ivec3 v, const int b)
+{
+   __retVal = v / ivec3(b);
+}
+
+
+//// Basic ivec4/int operators
+
+ivec4 __operator + (const int a, const ivec4 u)
+{
+   __retVal = ivec4(a) + u;
+}
+
+ivec4 __operator + (const ivec4 v, const int b)
+{
+   __retVal = v + ivec4(b);
+}
+
+ivec4 __operator - (const int a, const ivec4 u)
+{
+   __retVal = ivec4(a) - u;
+}
+
+ivec4 __operator - (const ivec4 v, const int b)
+{
+   __retVal = v - ivec4(b);
+}
+
+ivec4 __operator * (const int a, const ivec4 u)
+{
+   __retVal = ivec4(a) * u;
+}
+
+ivec4 __operator * (const ivec4 v, const int b)
+{
+   __retVal = v * ivec4(b);
+}
+
+ivec4 __operator / (const int a, const ivec4 u)
+{
+   __retVal = ivec4(a) / u;
+}
+
+ivec4 __operator / (const ivec4 v, const int b)
+{
+   __retVal = v / ivec4(b);
+}
+
+
+
+
+//// Unary negation operator
+
+int __operator - (const int a)
+{
+   __asm vec4_negate __retVal.x, a;
+}
+
+ivec2 __operator - (const ivec2 v)
+{
+   __asm vec4_negate __retVal, v;
+}
+
+ivec3 __operator - (const ivec3 v)
+{
+   __asm vec4_negate __retVal, v;
+}
+
+ivec4 __operator - (const ivec4 v)
+{
+   __asm vec4_negate __retVal, v;
 }
 
-vec2 __operator + (const vec2 v, const vec2 u) {
-    return vec2 (v.x + u.x, v.y + u.y);
+float __operator - (const float a)
+{
+   __asm vec4_negate __retVal.x, a;
 }
 
-vec2 __operator - (const vec2 v, const vec2 u) {
-    return vec2 (v.x - u.x, v.y - u.y);
+vec2 __operator - (const vec2 v)
+{
+   __asm vec4_negate __retVal.xy, v.xy;
 }
 
-vec2 __operator * (const vec2 v, const vec2 u) {
-    return vec2 (v.x * u.x, v.y * u.y);
+vec3 __operator - (const vec3 v)
+{
+   __asm vec4_negate __retVal.xyz, v.xyz;
 }
 
-vec2 __operator / (const vec2 v, const vec2 u) {
-    return vec2 (v.x / u.x, v.y / u.y);
+vec4 __operator - (const vec4 v)
+{
+   __asm vec4_negate __retVal, v;
 }
 
-vec3 __operator + (const vec3 v, const vec3 u) {
-    return vec3 (v.x + u.x, v.y + u.y, v.z + u.z);
+mat2 __operator - (const mat2 m)
+{
+   __retVal[0] = -m[0];
+   __retVal[1] = -m[1];
 }
 
-vec3 __operator - (const vec3 v, const vec3 u) {
-    return vec3 (v.x - u.x, v.y - u.y, v.z - u.z);
+mat3 __operator - (const mat3 m)
+{
+   __retVal[0] = -m[0];
+   __retVal[1] = -m[1];
+   __retVal[2] = -m[2];
 }
 
-vec3 __operator * (const vec3 v, const vec3 u) {
-    return vec3 (v.x * u.x, v.y * u.y, v.z * u.z);
+mat4 __operator - (const mat4 m)
+{
+   __retVal[0] = -m[0];
+   __retVal[1] = -m[1];
+   __retVal[2] = -m[2];
+   __retVal[3] = -m[3];
 }
 
-vec3 __operator / (const vec3 v, const vec3 u) {
-    return vec3 (v.x / u.x, v.y / u.y, v.z / u.z);
+
+
+//// dot product
+
+float dot(const float a, const float b)
+{
+   __retVal = a * b;
 }
 
-vec4 __operator + (const vec4 v, const vec4 u) {
-    return vec4 (v.x + u.x, v.y + u.y, v.z + u.z, v.w + u.w);
+float dot(const vec2 a, const vec2 b)
+{
+   __retVal = a.x * b.x + a.y * b.y;
 }
 
-vec4 __operator - (const vec4 v, const vec4 u) {
-    return vec4 (v.x - u.x, v.y - u.y, v.z - u.z, v.w - u.w);
+float dot(const vec3 a, const vec3 b)
+{
+    __asm vec3_dot __retVal, a, b;
 }
 
-vec4 __operator * (const vec4 v, const vec4 u) {
-    return vec4 (v.x * u.x, v.y * u.y, v.z * u.z, v.w * u.w);
+float dot(const vec4 a, const vec4 b)
+{
+    __asm vec4_dot __retVal, a, b;
 }
 
-vec4 __operator / (const vec4 v, const vec4 u) {
-    return vec4 (v.x / u.x, v.y / u.y, v.z / u.z, v.w / u.w);
+
+
+//// int assignment operators
+
+void __operator += (inout int a, const int b)
+{
+   __asm vec4_add a, a, b;
 }
 
-ivec2 __operator + (const ivec2 v, const ivec2 u) {
-    return ivec2 (v.x + u.x, v.y + u.y);
+void __operator -= (inout int a, const int b)
+{
+   __asm vec4_subtract a, a, b;
 }
 
-ivec2 __operator - (const ivec2 v, const ivec2 u) {
-    return ivec2 (v.x - u.x, v.y - u.y);
+void __operator *= (inout int a, const int b)
+{
+   __asm vec4_multiply a, a, b;
 }
 
-ivec2 __operator * (const ivec2 v, const ivec2 u) {
-    return ivec2 (v.x * u.x, v.y * u.y);
+void __operator /= (inout int a, const int b)
+{
+   float invB;
+   __asm float_rcp invB, b;
+   __asm vec4_multiply a, a, invB;
 }
 
-ivec2 __operator / (const ivec2 v, const ivec2 u) {
-    return ivec2 (v.x / u.x, v.y / u.y);
+
+//// ivec2 assignment operators
+
+void __operator += (inout ivec2 v, const ivec2 u)
+{
+   __asm vec4_add v, v, u;
 }
 
-ivec3 __operator + (const ivec3 v, const ivec3 u) {
-    return ivec3 (v.x + u.x, v.y + u.y, v.z + u.z);
+void __operator -= (inout ivec2 v, const ivec2 u)
+{
+   __asm vec4_subtract v, v, u;
 }
 
-ivec3 __operator - (const ivec3 v, const ivec3 u) {
-    return ivec3 (v.x - u.x, v.y - u.y, v.z - u.z);
+void __operator *= (inout ivec2 v, const ivec2 u)
+{
+   __asm vec4_multiply v, v, u;
 }
 
-ivec3 __operator * (const ivec3 v, const ivec3 u) {
-    return ivec3 (v.x * u.x, v.y * u.y, v.z * u.z);
+void __operator /= (inout ivec2 v, const ivec2 u)
+{
+   ivec2 inv, z;
+   __asm float_rcp inv.x, u.x;
+   __asm float_rcp inv.y, u.y;
+   __asm vec4_multiply z, v, inv;
+   __asm float_to_int __retVal, z;
 }
 
-ivec3 __operator / (const ivec3 v, const ivec3 u) {
-    return ivec3 (v.x / u.x, v.y / u.y, v.z / u.z);
+
+//// ivec3 assignment operators
+
+void __operator += (inout ivec3 v, const ivec3 u)
+{
+   __asm vec4_add v, v, u;
 }
 
-ivec4 __operator + (const ivec4 v, const ivec4 u) {
-    return ivec4 (v.x + u.x, v.y + u.y, v.z + u.z, v.w + u.w);
+void __operator -= (inout ivec3 v, const ivec3 u)
+{
+   __asm vec4_subtract v, v, u;
 }
 
-ivec4 __operator - (const ivec4 v, const ivec4 u) {
-    return ivec4 (v.x - u.x, v.y - u.y, v.z - u.z, v.w - u.w);
+void __operator *= (inout ivec3 v, const ivec3 u)
+{
+   __asm vec4_multiply v, v, u;
 }
 
-ivec4 __operator * (const ivec4 v, const ivec4 u) {
-    return ivec4 (v.x * u.x, v.y * u.y, v.z * u.z, v.w * u.w);
+void __operator /= (inout ivec3 v, const ivec3 u)
+{
+   ivec3 inv, z;
+   __asm float_rcp inv.x, u.x;
+   __asm float_rcp inv.y, u.y;
+   __asm vec4_multiply z, v, inv;
+   __asm float_to_int __retVal, z;
 }
 
-ivec4 __operator / (const ivec4 v, const ivec4 u) {
-    return ivec4 (v.x / u.x, v.y / u.y, v.z / u.z, v.w / u.w);
+
+//// ivec4 assignment operators
+
+void __operator += (inout ivec4 v, const ivec4 u)
+{
+   __asm vec4_add v, v, u;
 }
 
-mat2 __operator + (const mat2 m, const mat2 n) {
-    return mat2 (m[0] + n[0], m[1] + n[1]);
+void __operator -= (inout ivec4 v, const ivec4 u)
+{
+   __asm vec4_subtract v, v, u;
 }
 
-mat2 __operator - (const mat2 m, const mat2 n) {
-    return mat2 (m[0] - n[0], m[1] - n[1]);
+void __operator *= (inout ivec4 v, const ivec4 u)
+{
+   __asm vec4_multiply v, v, u;
 }
 
-mat2 __operator / (const mat2 m, const mat2 n) {
-    return mat2 (m[0] / n[0], m[1] / n[1]);
+void __operator /= (inout ivec4 v, const ivec4 u)
+{
+   ivec4 inv, z;
+   __asm float_rcp inv.x, u.x;
+   __asm float_rcp inv.y, u.y;
+   __asm vec4_multiply z, v, inv;
+   __asm float_to_int __retVal, z;
 }
 
-mat3 __operator + (const mat3 m, const mat3 n) {
-    return mat3 (m[0] + n[0], m[1] + n[1], m[2] + n[2]);
+
+//// float assignment operators
+
+void __operator += (inout float a, const float b)
+{
+    __asm vec4_add a.x, a.x, b;
 }
 
-mat3 __operator - (const mat3 m, const mat3 n) {
-    return mat3 (m[0] - n[0], m[1] - n[1], m[2] - n[2]);
+void __operator -= (inout float a, const float b)
+{
+    __asm vec4_subtract a.x, a, b;
 }
 
-mat3 __operator / (const mat3 m, const mat3 n) {
-    return mat3 (m[0] / n[0], m[1] / n[1], m[2] / n[2]);
+void __operator *= (inout float a, const float b)
+{
+    __asm vec4_multiply a.x, a, b;
 }
 
-mat4 __operator + (const mat4 m, const mat4 n) {
-    return mat4 (m[0] + n[0], m[1] + n[1], m[2] + n[2], m[3] + n[3]);
+void __operator /= (inout float a, const float b)
+{
+   float w; // = 1 / b
+   __asm float_rcp w.x, b;
+   __asm vec4_multiply a.x, a, w;
 }
 
-mat4 __operator - (const mat4 m, const mat4 n) {
-    return mat4 (m[0] - n[0], m[1] - n[1], m[2] - n[2], m[3] - n[3]);
+
+//// vec2 assignment operators
+
+void __operator += (inout vec2 v, const vec2 u)
+{
+   __asm vec4_add v.xy, v.xy, u.xy;
 }
 
-mat4 __operator / (const mat4 m, const mat4 n) {
-    return mat4 (m[0] / n[0], m[1] / n[1], m[2] / n[2], m[3] / n[3]);
+void __operator -= (inout vec2 v, const vec2 u)
+{
+   __asm vec4_subtract v.xy, v.xy, u.xy;
 }
 
-vec2 __operator + (const float a, const vec2 u) {
-    return vec2 (a + u.x, a + u.y);
+void __operator *= (inout vec2 v, const vec2 u)
+{
+   __asm vec4_multiply v.xy, v.xy, u.xy;
 }
 
-vec2 __operator + (const vec2 v, const float b) {
-    return vec2 (v.x + b, v.y + b);
+void __operator /= (inout vec2 v, const vec2 u)
+{
+   vec2 w;
+   __asm float_rcp w.x, u.x;
+   __asm float_rcp w.y, u.y;
+   __asm vec4_multiply v.xy, v.xy, w.xy;
 }
 
-vec2 __operator - (const float a, const vec2 u) {
-    return vec2 (a - u.x, a - u.y);
+
+//// vec3 assignment operators
+
+void __operator += (inout vec3 v, const vec3 u)
+{
+   __asm vec4_add v.xyz, v, u;
 }
 
-vec2 __operator - (const vec2 v, const float b) {
-    return vec2 (v.x - b, v.y - b);
+void __operator -= (inout vec3 v, const vec3 u)
+{
+   __asm vec4_subtract v.xyz, v, u;
 }
 
-vec2 __operator * (const float a, const vec2 u) {
-    return vec2 (a * u.x, a * u.y);
+void __operator *= (inout vec3 v, const vec3 u)
+{
+   __asm vec4_multiply v.xyz, v, u;
 }
 
-vec2 __operator * (const vec2 v, const float b) {
-    return vec2 (v.x * b, v.y * b);
+void __operator /= (inout vec3 v, const vec3 u)
+{
+   vec3 w;
+   __asm float_rcp w.x, u.x;
+   __asm float_rcp w.y, u.y;
+   __asm float_rcp w.z, u.z;
+   __asm vec4_multiply v.xyz, v.xyz, w.xyz;
 }
 
-vec2 __operator / (const float a, const vec2 u) {
-    return vec2 (a / u.x, a / u.y);
+
+//// vec4 assignment operators
+
+void __operator += (inout vec4 v, const vec4 u)
+{
+   __asm vec4_add v, v, u;
 }
 
-vec2 __operator / (const vec2 v, const float b) {
-    return vec2 (v.x / b, v.y / b);
+void __operator -= (inout vec4 v, const vec4 u)
+{
+   __asm vec4_subtract v, v, u;
 }
 
-vec3 __operator + (const float a, const vec3 u) {
-    return vec3 (a + u.x, a + u.y, a + u.z);
+void __operator *= (inout vec4 v, const vec4 u)
+{
+   __asm vec4_multiply v, v, u;
 }
 
-vec3 __operator + (const vec3 v, const float b) {
-    return vec3 (v.x + b, v.y + b, v.z + b);
+void __operator /= (inout vec4 v, const vec4 u)
+{
+   vec4 w;
+   __asm float_rcp w.x, u.x;
+   __asm float_rcp w.y, u.y;
+   __asm float_rcp w.z, u.z;
+   __asm float_rcp w.w, u.w;
+   __asm vec4_multiply v, v, w;
 }
 
-vec3 __operator - (const float a, const vec3 u) {
-    return vec3 (a - u.x, a - u.y, a - u.z);
+
+
+//// ivec2/int assignment operators
+
+void __operator += (inout ivec2 v, const int a)
+{
+   __asm vec4_add v.xy, v.xy, a.xx;
 }
 
-vec3 __operator - (const vec3 v, const float b) {
-    return vec3 (v.x - b, v.y - b, v.z - b);
+void __operator -= (inout ivec2 v, const int a)
+{
+   __asm vec4_subtract v.xy, v.xy, a.xx;
 }
 
-vec3 __operator * (const float a, const vec3 u) {
-    return vec3 (a * u.x, a * u.y, a * u.z);
+void __operator *= (inout ivec2 v, const int a)
+{
+   __asm vec4_multiply v.xy, v.xy, a.xx;
+   v.x *= a;
+   v.y *= a;
 }
 
-vec3 __operator * (const vec3 v, const float b) {
-    return vec3 (v.x * b, v.y * b, v.z * b);
+void __operator /= (inout ivec2 v, const int a)
+{
+// XXX rcp
+    v.x /= a;
+    v.y /= a;
 }
 
-vec3 __operator / (const float a, const vec3 u) {
-    return vec3 (a / u.x, a / u.y, a / u.z);
+
+//// ivec3/int assignment operators
+
+void __operator += (inout ivec3 v, const int a)
+{
+   __asm vec4_add v.xyz, v.xyz, a.xxx;
 }
 
-vec3 __operator / (const vec3 v, const float b) {
-    return vec3 (v.x / b, v.y / b, v.z / b);
+void __operator -= (inout ivec3 v, const int a)
+{
+   __asm vec4_subtract v.xyz, v.xyz, a.xxx;
 }
 
-vec4 __operator + (const float a, const vec4 u) {
-    return vec4 (a + u.x, a + u.y, a + u.z, a + u.w);
+void __operator *= (inout ivec3 v, const int a)
+{
+   __asm vec4_multiply v.xyz, v.xyz, a.xxx;
 }
 
-vec4 __operator + (const vec4 v, const float b) {
-    return vec4 (v.x + b, v.y + b, v.z + b, v.w + b);
+void __operator /= (inout ivec3 v, const int a)
+{
+   // XXX rcp
+    v.x /= a;
+    v.y /= a;
+    v.z /= a;
 }
 
-vec4 __operator - (const float a, const vec4 u) {
-    return vec4 (a - u.x, a - u.y, a - u.z, a - u.w);
+
+//// ivec4/int assignment operators
+
+void __operator += (inout ivec4 v, const int a)
+{
+   __asm vec4_add v, v, a.xxxx;
 }
 
-vec4 __operator - (const vec4 v, const float b) {
-    return vec4 (v.x - b, v.y - b, v.z - b, v.w - b);
+void __operator -= (inout ivec4 v, const int a)
+{
+   __asm vec4_subtract v, v, a.xxxx;
 }
 
-vec4 __operator * (const float a, const vec4 u) {
-    return vec4 (a * u.x, a * u.y, a * u.z, a * u.w);
+void __operator *= (inout ivec4 v, const int a)
+{
+   __asm vec4_multiply v, v, a.xxxx;
 }
 
-vec4 __operator * (const vec4 v, const float b) {
-    return vec4 (v.x * b, v.y * b, v.z * b, v.w * b);
+void __operator /= (inout ivec4 v, const int a)
+{
+    v.x /= a;
+    v.y /= a;
+    v.z /= a;
+    v.w /= a;
 }
 
-vec4 __operator / (const float a, const vec4 u) {
-    return vec4 (a / u.x, a / u.y, a / u.z, a / u.w);
+
+
+//// vec2/float assignment operators
+
+void __operator += (inout vec2 v, const float a)
+{
+   __asm vec4_add v.xy, v, a.xx;
 }
 
-vec4 __operator / (const vec4 v, const float b) {
-    return vec4 (v.x / b, v.y / b, v.z / b, v.w / b);
+void __operator -= (inout vec2 v, const float a)
+{
+   __asm vec4_subtract v.xy, v, a.xx;
 }
 
-mat2 __operator + (const float a, const mat2 n) {
-    return mat2 (a + n[0], a + n[1]);
+void __operator *= (inout vec2 v, const float a)
+{
+   __asm vec4_multiply v.xy, v, a.xx;
 }
 
-mat2 __operator + (const mat2 m, const float b) {
-    return mat2 (m[0] + b, m[1] + b);
+void __operator /= (inout vec2 v, const float a)
+{
+   float invA;
+   __asm float_rcp invA, a;
+   __asm vec4_multiply v.xy, v.xy, a.xx;
 }
 
-mat2 __operator - (const float a, const mat2 n) {
-    return mat2 (a - n[0], a - n[1]);
+
+//// vec3/float assignment operators
+
+void __operator += (inout vec3 v, const float a)
+{
+   __asm vec4_add v.xyz, v, a.xxx;
 }
 
-mat2 __operator - (const mat2 m, const float b) {
-    return mat2 (m[0] - b, m[1] - b);
+void __operator -= (inout vec3 v, const float a)
+{
+   __asm vec4_subtract v.xyz, v, a.xxx;
 }
 
-mat2 __operator * (const float a, const mat2 n) {
-    return mat2 (a * n[0], a * n[1]);
+void __operator *= (inout vec3 v, const float a)
+{
+   __asm vec4_multiply v.xyz, v, a.xxx;
 }
 
-mat2 __operator * (const mat2 m, const float b) {
-    return mat2 (m[0] * b, m[1] * b);
+void __operator /= (inout vec3 v, const float a)
+{
+   float invA;
+   __asm float_rcp invA, a;
+   __asm vec4_multiply v.xyz, v.xyz, a.xxx;
 }
 
-mat2 __operator / (const float a, const mat2 n) {
-    return mat2 (a / n[0], a / n[1]);
+
+//// vec4/float assignment operators
+
+void __operator += (inout vec4 v, const float a)
+{
+   __asm vec4_add v, v, a.xxxx;
 }
 
-mat2 __operator / (const mat2 m, const float b) {
-    return mat2 (m[0] / b, m[1] / b);
+void __operator -= (inout vec4 v, const float a)
+{
+   __asm vec4_subtract v, v, a.xxxx;
 }
 
-mat3 __operator + (const float a, const mat3 n) {
-    return mat3 (a + n[0], a + n[1], a + n[2]);
+void __operator *= (inout vec4 v, const float a)
+{
+   __asm vec4_multiply v, v, a.xxxx;
 }
 
-mat3 __operator + (const mat3 m, const float b) {
-    return mat3 (m[0] + b, m[1] + b, m[2] + b);
+void __operator /= (inout vec4 v, const float a)
+{
+   float invA;
+   __asm float_rcp invA, a;
+   __asm vec4_multiply v, v, a.xxxx;
 }
 
-mat3 __operator - (const float a, const mat3 n) {
-    return mat3 (a - n[0], a - n[1], a - n[2]);
+
+
+
+
+//// Basic mat2 operations
+
+mat2 __operator + (const mat2 m, const mat2 n)
+{
+   __retVal[0] = m[0] + n[0];
+   __retVal[1] = m[1] + n[1];
 }
 
-mat3 __operator - (const mat3 m, const float b) {
-    return mat3 (m[0] - b, m[1] - b, m[2] - b);
+mat2 __operator - (const mat2 m, const mat2 n)
+{
+   __retVal[0] = m[0] - n[0];
+   __retVal[1] = m[1] - n[1];
 }
 
-mat3 __operator * (const float a, const mat3 n) {
-    return mat3 (a * n[0], a * n[1], a * n[2]);
+mat2 __operator * (const mat2 m, const mat2 n)
+{
+   vec2 mRow0, mRow1;
+   mRow0.x = m[0].x;
+   mRow0.y = m[1].x;
+   mRow1.x = m[0].y;
+   mRow1.y = m[1].y;
+   __retVal[0].x = dot(mRow0, n[0]);
+   __retVal[1].x = dot(mRow0, n[1]);
+   __retVal[0].y = dot(mRow1, n[0]);
+   __retVal[1].y = dot(mRow1, n[1]);
 }
 
-mat3 __operator * (const mat3 m, const float b) {
-    return mat3 (m[0] * b, m[1] * b, m[2] * b);
+mat2 __operator / (const mat2 m, const mat2 n)
+{
+   __retVal[0] = m[0] / n[0];
+   __retVal[1] = m[1] / n[1];
 }
 
-mat3 __operator / (const float a, const mat3 n) {
-    return mat3 (a / n[0], a / n[1], a / n[2]);
+
+//// Basic mat3 operations
+
+mat3 __operator + (const mat3 m, const mat3 n)
+{
+   __retVal[0] = m[0] + n[0];
+   __retVal[1] = m[1] + n[1];
+   __retVal[2] = m[2] + n[2];
 }
 
-mat3 __operator / (const mat3 m, const float b) {
-    return mat3 (m[0] / b, m[1] / b, m[2] / b);
+mat3 __operator - (const mat3 m, const mat3 n)
+{
+   __retVal[0] = m[0] - n[0];
+   __retVal[1] = m[1] - n[1];
+   __retVal[2] = m[2] - n[2];
+}
+
+mat3 __operator * (const mat3 m, const mat3 n)
+{
+   // sub-blocks to reduce register usage
+   {
+      vec3 mRow0;
+      mRow0.x = m[0].x;
+      mRow0.y = m[1].x;
+      mRow0.z = m[2].x;
+      __retVal[0].x = dot(mRow0, n[0]);
+      __retVal[1].x = dot(mRow0, n[1]);
+      __retVal[2].x = dot(mRow0, n[2]);
+   }
+   {
+      vec3 mRow1;
+      mRow1.x = m[0].y;
+      mRow1.y = m[1].y;
+      mRow1.z = m[2].y;
+      __retVal[0].y = dot(mRow1, n[0]);
+      __retVal[1].y = dot(mRow1, n[1]);
+      __retVal[2].y = dot(mRow1, n[2]);
+   }
+   {
+      vec3 mRow2;
+      mRow2.x = m[0].z;
+      mRow2.y = m[1].z;
+      mRow2.z = m[2].z;
+      __retVal[0].z = dot(mRow2, n[0]);
+      __retVal[1].z = dot(mRow2, n[1]);
+      __retVal[2].z = dot(mRow2, n[2]);
+   }
+}
+
+mat3 __operator / (const mat3 m, const mat3 n)
+{
+    __retVal[0] = m[0] / n[0];
+    __retVal[1] = m[1] / n[1];
+    __retVal[2] = m[2] / n[2];
+}
+
+
+//// Basic mat4 operations
+
+mat4 __operator + (const mat4 m, const mat4 n)
+{
+   __retVal[0] = m[0] + n[0];
+   __retVal[1] = m[1] + n[1];
+   __retVal[2] = m[2] + n[2];
+   __retVal[3] = m[3] + n[3];
+}
+
+mat4 __operator - (const mat4 m, const mat4 n)
+{
+   __retVal[0] = m[0] - n[0];
+   __retVal[1] = m[1] - n[1];
+   __retVal[2] = m[2] - n[2];
+   __retVal[3] = m[3] - n[3];
+}
+
+mat4 __operator * (const mat4 m, const mat4 n)
+{
+   // sub-blocks to reduce temporary usage
+   {
+      vec4 mRow0;
+      mRow0.x = m[0].x;
+      mRow0.y = m[1].x;
+      mRow0.z = m[2].x;
+      mRow0.w = m[3].x;
+      __retVal[0].x = dot(mRow0, n[0]);
+      __retVal[1].x = dot(mRow0, n[1]);
+      __retVal[2].x = dot(mRow0, n[2]);
+      __retVal[3].x = dot(mRow0, n[3]);
+   }
+   {
+      vec4 mRow1;
+      mRow1.x = m[0].y;
+      mRow1.y = m[1].y;
+      mRow1.z = m[2].y;
+      mRow1.w = m[3].y;
+      __retVal[0].y = dot(mRow1, n[0]);
+      __retVal[1].y = dot(mRow1, n[1]);
+      __retVal[2].y = dot(mRow1, n[2]);
+      __retVal[3].y = dot(mRow1, n[3]);
+   }
+   {
+      vec4 mRow2;
+      mRow2.x = m[0].z;
+      mRow2.y = m[1].z;
+      mRow2.z = m[2].z;
+      mRow2.w = m[3].z;
+      __retVal[0].z = dot(mRow2, n[0]);
+      __retVal[1].z = dot(mRow2, n[1]);
+      __retVal[2].z = dot(mRow2, n[2]);
+      __retVal[3].z = dot(mRow2, n[3]);
+   }
+   {
+      vec4 mRow3;
+      mRow3.x = m[0].w;
+      mRow3.y = m[1].w;
+      mRow3.z = m[2].w;
+      mRow3.w = m[3].w;
+      __retVal[0].w = dot(mRow3, n[0]);
+      __retVal[1].w = dot(mRow3, n[1]);
+      __retVal[2].w = dot(mRow3, n[2]);
+      __retVal[3].w = dot(mRow3, n[3]);
+   }
+}
+
+mat4 __operator / (const mat4 m, const mat4 n)
+{
+    __retVal[0] = m[0] / n[0];
+    __retVal[1] = m[1] / n[1];
+    __retVal[2] = m[2] / n[2];
+    __retVal[3] = m[3] / n[3];
+}
+
+
+//// mat2/float operations
+
+mat2 __operator + (const float a, const mat2 n)
+{
+   __retVal[0] = a + n[0];
+   __retVal[1] = a + n[1];
+}
+
+mat2 __operator + (const mat2 m, const float b)
+{
+   __retVal[0] = m[0] + b;
+   __retVal[1] = m[1] + b;
 }
 
-mat4 __operator + (const float a, const mat4 n) {
-    return mat4 (a + n[0], a + n[1], a + n[2], a + n[3]);
+mat2 __operator - (const float a, const mat2 n)
+{
+   __retVal[0] = a - n[0];
+   __retVal[1] = a - n[1];
 }
 
-mat4 __operator + (const mat4 m, const float b) {
-    return mat4 (m[0] + b, m[1] + b, m[2] + b, m[3] + b);
+mat2 __operator - (const mat2 m, const float b)
+{
+   __retVal[0] = m[0] - b;
+   __retVal[1] = m[1] - b;
+}
+
+mat2 __operator * (const float a, const mat2 n)
+{
+   __retVal[0] = a * n[0];
+   __retVal[1] = a * n[1];
 }
 
-mat4 __operator - (const float a, const mat4 n) {
-    return mat4 (a - n[0], a - n[1], a - n[2], a - n[3]);
+mat2 __operator * (const mat2 m, const float b)
+{
+   __retVal[0] = m[0] * b;
+   __retVal[1] = m[1] * b;
+}
+
+mat2 __operator / (const float a, const mat2 n)
+{
+   __retVal[0] = a / n[0];
+   __retVal[1] = a / n[1];
 }
 
-mat4 __operator - (const mat4 m, const float b) {
-    return mat4 (m[0] - b, m[1] - b, m[2] - b, m[3] - b);
+mat2 __operator / (const mat2 m, const float b)
+{
+   __retVal[0] = m[0] / b;
+   __retVal[1] = m[1] / b;
 }
 
-mat4 __operator * (const float a, const mat4 n) {
-    return mat4 (a * n[0], a * n[1], a * n[2], a * n[3]);
+
+//// mat3/float operations
+
+mat3 __operator + (const float a, const mat3 n)
+{
+   __retVal[0] = a + n[0];
+   __retVal[1] = a + n[1];
+   __retVal[2] = a + n[2];
 }
 
-mat4 __operator * (const mat4 m, const float b) {
-    return mat4 (m[0] * b, m[1] * b, m[2] * b, m[3] * b);
+mat3 __operator + (const mat3 m, const float b)
+{
+   __retVal[0] = m[0] + b;
+   __retVal[1] = m[1] + b;
+   __retVal[2] = m[2] + b;
 }
 
-mat4 __operator / (const float a, const mat4 n) {
-    return mat4 (a / n[0], a / n[1], a / n[2], a / n[3]);
+mat3 __operator - (const float a, const mat3 n)
+{
+   __retVal[0] = a - n[0];
+   __retVal[1] = a - n[1];
+   __retVal[2] = a - n[2];
 }
 
-mat4 __operator / (const mat4 m, const float b) {
-    return mat4 (m[0] / b, m[1] / b, m[2] / b, m[3] / b);
+mat3 __operator - (const mat3 m, const float b)
+{
+   __retVal[0] = m[0] - b;
+   __retVal[1] = m[1] - b;
+   __retVal[2] = m[2] - b;
 }
 
-ivec2 __operator + (const int a, const ivec2 u) {
-    return ivec2 (a) + u;
+mat3 __operator * (const float a, const mat3 n)
+{
+   __retVal[0] = a * n[0];
+   __retVal[1] = a * n[1];
+   __retVal[2] = a * n[2];
 }
 
-ivec2 __operator + (const ivec2 v, const int b) {
-    return v + ivec2 (b);
+mat3 __operator * (const mat3 m, const float b)
+{
+   __retVal[0] = m[0] * b;
+   __retVal[1] = m[1] * b;
+   __retVal[2] = m[2] * b;
 }
 
-ivec2 __operator - (const int a, const ivec2 u) {
-    return ivec2 (a) - u;
+mat3 __operator / (const float a, const mat3 n)
+{
+   __retVal[0] = a / n[0];
+   __retVal[1] = a / n[1];
+   __retVal[2] = a / n[2];
 }
 
-ivec2 __operator - (const ivec2 v, const int b) {
-    return v - ivec2 (b);
+mat3 __operator / (const mat3 m, const float b)
+{
+   __retVal[0] = m[0] / b;
+   __retVal[1] = m[1] / b;
+   __retVal[2] = m[2] / b;
 }
 
-ivec2 __operator * (const int a, const ivec2 u) {
-    return ivec2 (a) * u;
+
+//// mat4/float operations
+
+mat4 __operator + (const float a, const mat4 n)
+{
+   __retVal[0] = a + n[0];
+   __retVal[1] = a + n[1];
+   __retVal[2] = a + n[2];
+   __retVal[3] = a + n[3];
 }
 
-ivec2 __operator * (const ivec2 v, const int b) {
-    return v * ivec2 (b);
+mat4 __operator + (const mat4 m, const float b)
+{
+   __retVal[0] = m[0] + b;
+   __retVal[1] = m[1] + b;
+   __retVal[2] = m[2] + b;
+   __retVal[3] = m[3] + b;
 }
 
-ivec2 __operator / (const int a, const ivec2 u) {
-    return ivec2 (a) / u;
+mat4 __operator - (const float a, const mat4 n)
+{
+   __retVal[0] = a - n[0];
+   __retVal[1] = a - n[1];
+   __retVal[2] = a - n[2];
+   __retVal[3] = a - n[3];
 }
 
-ivec2 __operator / (const ivec2 v, const int b) {
-    return v / ivec2 (b);
+mat4 __operator - (const mat4 m, const float b)
+{
+   __retVal[0] = m[0] - b;
+   __retVal[1] = m[1] - b;
+   __retVal[2] = m[2] - b;
+   __retVal[3] = m[3] - b;
 }
 
-ivec3 __operator + (const int a, const ivec3 u) {
-    return ivec3 (a) + u;
+mat4 __operator * (const float a, const mat4 n)
+{
+   __retVal[0] = a * n[0];
+   __retVal[1] = a * n[1];
+   __retVal[2] = a * n[2];
+   __retVal[3] = a * n[3];
+}
+
+mat4 __operator * (const mat4 m, const float b)
+{
+   __retVal[0] = m[0] * b;
+   __retVal[1] = m[1] * b;
+   __retVal[2] = m[2] * b;
+   __retVal[3] = m[3] * b;
+}
+
+mat4 __operator / (const float a, const mat4 n)
+{
+   __retVal[0] = a / n[0];
+   __retVal[1] = a / n[1];
+   __retVal[2] = a / n[2];
+   __retVal[3] = a / n[3];
+}
+
+mat4 __operator / (const mat4 m, const float b)
+{
+   __retVal[0] = m[0] / b;
+   __retVal[1] = m[1] / b;
+   __retVal[2] = m[2] / b;
+   __retVal[3] = m[3] / b;
+}
+
+
+
+//// matrix / vector products
+
+vec2 __operator * (const mat2 m, const vec2 v)
+{
+   vec2 r0, r1;
+   r0.x = m[0].x;
+   r0.y = m[1].x;
+   r1.x = m[0].y;
+   r1.y = m[1].y;
+   __retVal.x = dot(r0, v);
+   __retVal.y = dot(r1, v);
+}
+
+vec2 __operator * (const vec2 v, const mat2 m)
+{
+   __retVal.x = dot(v, m[0]);
+   __retVal.y = dot(v, m[1]);
+}
+
+vec3 __operator * (const mat3 m, const vec3 v)
+{
+   {
+      vec3 r0;
+      r0.x = m[0].x;
+      r0.y = m[1].x;
+      r0.z = m[2].x;
+      __asm vec3_dot __retVal.x, r0, v;
+   }
+   {
+      vec3 r1;
+      r1.x = m[0].y;
+      r1.y = m[1].y;
+      r1.z = m[2].y;
+      __asm vec3_dot __retVal.y, r1, v;
+   }
+   {
+      vec3 r2;
+      r2.x = m[0].z;
+      r2.y = m[1].z;
+      r2.z = m[2].z;
+      __asm vec3_dot __retVal.z, r2, v;
+   }
+}
+
+vec3 __operator * (const vec3 v, const mat3 m)
+{
+   __retVal.x = dot(v, m[0]);
+   __retVal.y = dot(v, m[1]);
+   __retVal.z = dot(v, m[2]);
+}
+
+vec4 __operator * (const mat4 m, const vec4 v)
+{
+   // extract rows, then do dot product
+   {
+      vec4 r0;
+      r0.x = m[0].x;
+      r0.y = m[1].x;
+      r0.z = m[2].x;
+      r0.w = m[3].x;
+      __asm vec4_dot __retVal.x, r0, v;
+   }
+   {
+      vec4 r1;
+      r1.x = m[0].y;
+      r1.y = m[1].y;
+      r1.z = m[2].y;
+      r1.w = m[3].y;
+      __asm vec4_dot __retVal.y, r1, v;
+   }
+   {
+      vec4 r2;
+      r2.x = m[0].z;
+      r2.y = m[1].z;
+      r2.z = m[2].z;
+      r2.w = m[3].z;
+      __asm vec4_dot __retVal.z, r2, v;
+   }
+   {
+      vec4 r3;
+      r3.x = m[0].w;
+      r3.y = m[1].w;
+      r3.z = m[2].w;
+      r3.w = m[3].w;
+      __asm vec4_dot __retVal.w, r3, v;
+   }
+}
+
+vec4 __operator * (const vec4 v, const mat4 m)
+{
+   //mm
+   __retVal.x = dot(v, m[0]);
+   __retVal.y = dot(v, m[1]);
+   __retVal.z = dot(v, m[2]);
+   __retVal.w = dot(v, m[3]);
+}
+
+
+
+//// mat2 assignment operators
+
+void __operator += (inout mat2 m, const mat2 n)
+{
+    m[0] += n[0];
+    m[1] += n[1];
 }
 
-ivec3 __operator + (const ivec3 v, const int b) {
-    return v + ivec3 (b);
+void __operator -= (inout mat2 m, const mat2 n)
+{
+    m[0] -= n[0];
+    m[1] -= n[1];
 }
 
-ivec3 __operator - (const int a, const ivec3 u) {
-    return ivec3 (a) - u;
+void __operator *= (inout mat2 m, const mat2 n)
+{
+    m = m * n;
 }
 
-ivec3 __operator - (const ivec3 v, const int b) {
-    return v - ivec3 (b);
+void __operator /= (inout mat2 m, const mat2 n)
+{
+    m[0] /= n[0];
+    m[1] /= n[1];
 }
 
-ivec3 __operator * (const int a, const ivec3 u) {
-    return ivec3 (a) * u;
+
+//// mat3 assignment operators
+
+void __operator += (inout mat3 m, const mat3 n)
+{
+    m[0] += n[0];
+    m[1] += n[1];
+    m[2] += n[2];
 }
 
-ivec3 __operator * (const ivec3 v, const int b) {
-    return v * ivec3 (b);
+void __operator -= (inout mat3 m, const mat3 n)
+{
+    m[0] -= n[0];
+    m[1] -= n[1];
+    m[2] -= n[2];
 }
 
-ivec3 __operator / (const int a, const ivec3 u) {
-    return ivec3 (a) / u;
+void __operator *= (inout mat3 m, const mat3 n)
+{
+    m = m * n;
 }
 
-ivec3 __operator / (const ivec3 v, const int b) {
-    return v / ivec3 (b);
+void __operator /= (inout mat3 m, const mat3 n)
+{
+    m[0] /= n[0];
+    m[1] /= n[1];
+    m[2] /= n[2];
 }
 
-ivec4 __operator + (const int a, const ivec4 u) {
-    return ivec4 (a) + u;
+
+// mat4 assignment operators
+
+void __operator += (inout mat4 m, const mat4 n)
+{
+    m[0] += n[0];
+    m[1] += n[1];
+    m[2] += n[2];
+    m[3] += n[3];
 }
 
-ivec4 __operator + (const ivec4 v, const int b) {
-    return v + ivec4 (b);
+void __operator -= (inout mat4 m, const mat4 n) {
+    m[0] -= n[0];
+    m[1] -= n[1];
+    m[2] -= n[2];
+    m[3] -= n[3];
+}
+
+void __operator *= (inout mat4 m, const mat4 n)
+{
+    m = m * n;
 }
 
-ivec4 __operator - (const int a, const ivec4 u) {
-    return ivec4 (a) - u;
+void __operator /= (inout mat4 m, const mat4 n)
+{
+    m[0] /= n[0];
+    m[1] /= n[1];
+    m[2] /= n[2];
+    m[3] /= n[3];
+}
+
+
+//// mat2/float assignment operators
+
+void __operator += (inout mat2 m, const float a) {
+    m[0] += a;
+    m[1] += a;
 }
 
-ivec4 __operator - (const ivec4 v, const int b) {
-    return v - ivec4 (b);
+void __operator -= (inout mat2 m, const float a) {
+    m[0] -= a;
+    m[1] -= a;
 }
 
-ivec4 __operator * (const int a, const ivec4 u) {
-    return ivec4 (a) * u;
+void __operator *= (inout mat2 m, const float a) {
+    m[0] *= a;
+    m[1] *= a;
 }
 
-ivec4 __operator * (const ivec4 v, const int b) {
-    return v * ivec4 (b);
+void __operator /= (inout mat2 m, const float a) {
+    m[0] /= a;
+    m[1] /= a;
 }
 
-ivec4 __operator / (const int a, const ivec4 u) {
-    return ivec4 (a) / u;
+
+//// mat3/float assignment operators
+
+void __operator += (inout mat3 m, const float a) {
+    m[0] += a;
+    m[1] += a;
+    m[2] += a;
 }
 
-ivec4 __operator / (const ivec4 v, const int b) {
-    return v / ivec4 (b);
+void __operator -= (inout mat3 m, const float a) {
+    m[0] -= a;
+    m[1] -= a;
+    m[2] -= a;
 }
 
-vec2 __operator - (const vec2 v) {
-    return vec2 (-v.x, -v.y);
+void __operator *= (inout mat3 m, const float a) {
+    m[0] *= a;
+    m[1] *= a;
+    m[2] *= a;
 }
 
-vec3 __operator - (const vec3 v) {
-    return vec3 (-v.x, -v.y, -v.z);
+void __operator /= (inout mat3 m, const float a) {
+    m[0] /= a;
+    m[1] /= a;
+    m[2] /= a;
 }
 
-vec4 __operator - (const vec4 v) {
-    return vec4 (-v.x, -v.y, -v.z, -v.w);
+
+//// mat4/float assignment operators
+
+void __operator += (inout mat4 m, const float a) {
+    m[0] += a;
+    m[1] += a;
+    m[2] += a;
+    m[3] += a;
 }
 
-ivec2 __operator - (const ivec2 v) {
-    return ivec2 (-v.x, -v.y);
+void __operator -= (inout mat4 m, const float a) {
+    m[0] -= a;
+    m[1] -= a;
+    m[2] -= a;
+    m[3] -= a;
 }
 
-ivec3 __operator - (const ivec3 v) {
-    return ivec3 (-v.x, -v.y, -v.z);
+void __operator *= (inout mat4 m, const float a) {
+    m[0] *= a;
+    m[1] *= a;
+    m[2] *= a;
+    m[3] *= a;
 }
 
-ivec4 __operator - (const ivec4 v) {
-    return ivec4 (-v.x, -v.y, -v.z, -v.w);
+void __operator /= (inout mat4 m, const float a) {
+    m[0] /= a;
+    m[1] /= a;
+    m[2] /= a;
+    m[3] /= a;
 }
 
-mat2 __operator - (const mat2 m) {
-    return mat2 (-m[0], -m[1]);
+
+
+//// vec/mat assignment operators
+
+void __operator *= (inout vec2 v, const mat2 m)
+{
+    v = v * m;
 }
 
-mat3 __operator - (const mat3 m) {
-    return mat3 (-m[0], -m[1], -m[2]);
+void __operator *= (inout vec3 v, const mat3 m)
+{
+    v = v * m;
 }
 
-mat4 __operator - (const mat4 m) {
-    return mat4 (-m[0], -m[1], -m[2], -m[3]);
+void __operator *= (inout vec4 v, const mat4 m)
+{
+    v = v * m;
 }
 
-void __operator -- (inout float a) {
-    a -= 1.0;
+
+
+//// pre-decrement operators
+
+int __operator --(inout int a)
+{
+    a = a - 1;
+   __retVal = a;
 }
 
-void __operator -- (inout int a) {
-    a -= 1;
+ivec2 __operator --(inout ivec2 v)
+{
+   v = v - ivec2(1);
+   __retVal = v;
 }
 
-void __operator -- (inout vec2 v) {
-    --v.x;
-    --v.y;
+ivec3 __operator --(inout ivec3 v)
+{
+   v = v - ivec3(1);
+   __retVal = v;
 }
 
-void __operator -- (inout vec3 v) {
-    --v.x;
-    --v.y;
-    --v.z;
+ivec4 __operator --(inout ivec4 v)
+{
+   v = v - ivec4(1);
+   __retVal = v;
 }
 
-void __operator -- (inout vec4 v) {
-    --v.x;
-    --v.y;
-    --v.z;
-    --v.w;
+
+float __operator --(inout float a)
+{
+   a = a - 1.0;
+   __retVal = a;
 }
 
-void __operator -- (inout ivec2 v) {
-    --v.x;
-    --v.y;
+vec2 __operator --(inout vec2 v)
+{
+   v = v - vec2(1.0);
+   __retVal = v;
 }
 
-void __operator -- (inout ivec3 v) {
-    --v.x;
-    --v.y;
-    --v.z;
+vec3 __operator --(inout vec3 v)
+{
+   v = v - vec3(1.0);
+   __retVal = v;
 }
 
-void __operator -- (inout ivec4 v) {
-    --v.x;
-    --v.y;
-    --v.z;
-    --v.w;
+vec4 __operator --(inout vec4 v)
+{
+   v = v - vec4(1.0);
+   __retVal = v;
 }
 
-void __operator -- (inout mat2 m) {
-    --m[0];
-    --m[1];
+
+mat2 __operator --(inout mat2 m)
+{
+   m[0] = m[0] - vec2(1.0);
+   m[1] = m[1] - vec2(1.0);
+   __retVal = m;
 }
 
-void __operator -- (inout mat3 m) {
-    --m[0];
-    --m[1];
-    --m[2];
+mat3 __operator --(inout mat3 m)
+{
+   m[0] = m[0] - vec3(1.0);
+   m[1] = m[1] - vec3(1.0);
+   m[2] = m[2] - vec3(1.0);
+   __retVal = m;
 }
 
-void __operator -- (inout mat4 m) {
-    --m[0];
-    --m[1];
-    --m[2];
-    --m[3];
+mat4 __operator --(inout mat4 m)
+{
+   m[0] = m[0] - vec4(1.0);
+   m[1] = m[1] - vec4(1.0);
+   m[2] = m[2] - vec4(1.0);
+   m[3] = m[3] - vec4(1.0);
+   __retVal = m;
 }
 
-void __operator ++ (inout float a) {
-    a += 1.0;
+
+//// pre-increment operators
+
+int __operator ++(inout int a)
+{
+    a = a + 1;
+    __retVal = a;
 }
 
-void __operator ++ (inout int a) {
-    a += 1;
+ivec2 __operator ++(inout ivec2 v)
+{
+   v = v + ivec2(1);
+   __retVal = v;
 }
 
-void __operator ++ (inout vec2 v) {
-    ++v.x;
-    ++v.y;
+ivec3 __operator ++(inout ivec3 v)
+{
+   v = v + ivec3(1);
+   __retVal = v;
 }
 
-void __operator ++ (inout vec3 v) {
-    ++v.x;
-    ++v.y;
-    ++v.z;
+ivec4 __operator ++(inout ivec4 v)
+{
+   v = v + ivec4(1);
+   __retVal = v;
 }
 
-void __operator ++ (inout vec4 v) {
-    ++v.x;
-    ++v.y;
-    ++v.z;
-    ++v.w;
+
+float __operator ++(inout float a)
+{
+    a = a + 1.0;
+    __retVal = a;
 }
 
-void __operator ++ (inout ivec2 v) {
-    ++v.x;
-    ++v.y;
+vec2 __operator ++(inout vec2 v)
+{
+   v = v + vec2(1.0);
+   __retVal = v;
 }
 
-void __operator ++ (inout ivec3 v) {
-    ++v.x;
-    ++v.y;
-    ++v.z;
+vec3 __operator ++(inout vec3 v)
+{
+   v = v + vec3(1.0);
+   __retVal = v;
 }
 
-void __operator ++ (inout ivec4 v) {
-    ++v.x;
-    ++v.y;
-    ++v.z;
-    ++v.w;
+vec4 __operator ++(inout vec4 v)
+{
+   v = v + vec4(1.0);
+   __retVal = v;
 }
 
-void __operator ++ (inout mat2 m) {
-    ++m[0];
-    ++m[1];
+
+mat2 __operator ++(inout mat2 m)
+{
+   m[0] = m[0] + vec2(1.0);
+   m[1] = m[1] + vec2(1.0);
+   __retVal = m;
 }
 
-void __operator ++ (inout mat3 m) {
-    ++m[0];
-    ++m[1];
-    ++m[2];
+mat3 __operator ++(inout mat3 m)
+{
+   m[0] = m[0] + vec3(1.0);
+   m[1] = m[1] + vec3(1.0);
+   m[2] = m[2] + vec3(1.0);
+   __retVal = m;
 }
 
-void __operator ++ (inout mat4 m) {
-    ++m[0];
-    ++m[1];
-    ++m[2];
-    ++m[3];
+mat4 __operator ++(inout mat4 m)
+{
+   m[0] = m[0] + vec4(1.0);
+   m[1] = m[1] + vec4(1.0);
+   m[2] = m[2] + vec4(1.0);
+   m[3] = m[3] + vec4(1.0);
+   __retVal = m;
 }
 
-//
-// NOTE: postfix increment and decrement operators take additional dummy int parameter to
-//       distinguish their prototypes from prefix ones.
-//
 
-float __operator -- (inout float a, const int) {
-    float b = a;
-    --a;
-    return b;
+
+//// post-decrement
+
+int __postDecr(inout int a)
+{
+   __retVal = a;
+   a = a - 1;
 }
 
-int __operator -- (inout int a, const int) {
-    int b = a;
-    --a;
-    return b;
+ivec2 __postDecr(inout ivec2 v)
+{
+   __retVal = v;
+   v = v - ivec2(1);
 }
 
-vec2 __operator -- (inout vec2 v, const int) {
-    return vec2 (v.x--, v.y--);
+ivec3 __postDecr(inout ivec3 v)
+{
+   __retVal = v;
+   v = v - ivec3(1);
 }
 
-vec3 __operator -- (inout vec3 v, const int) {
-    return vec3 (v.x--, v.y--, v.z--);
+ivec4 __postDecr(inout ivec4 v)
+{
+   __retVal = v;
+   v = v - ivec4(1);
 }
 
-vec4 __operator -- (inout vec4 v, const int) {
-    return vec4 (v.x--, v.y--, v.z--, v.w--);
+
+float __postDecr(inout float a)
+{
+   __retVal = a;
+   a = a - 1.0;
 }
 
-ivec2 __operator -- (inout ivec2 v, const int) {
-    return ivec2 (v.x--, v.y--);
+vec2 __postDecr(inout vec2 v)
+{
+   __retVal = v;
+   v = v - vec2(1.0);
 }
 
-ivec3 __operator -- (inout ivec3 v, const int) {
-    return ivec3 (v.x--, v.y--, v.z--);
+vec3 __postDecr(inout vec3 v)
+{
+   __retVal = v;
+   v = v - vec3(1.0);
 }
 
-ivec4 __operator -- (inout ivec4 v, const int) {
-    return ivec4 (v.x--, v.y--, v.z--, v.w--);
+vec4 __postDecr(inout vec4 v)
+{
+   __retVal = v;
+   v = v - vec4(1.0);
 }
 
-mat2 __operator -- (inout mat2 m, const int) {
-    return mat2 (m[0]--, m[1]--);
+
+mat2 __postDecr(inout mat2 m)
+{
+   __retVal = m;
+   m[0] = m[0] - vec2(1.0);
+   m[1] = m[1] - vec2(1.0);
 }
 
-mat3 __operator -- (inout mat3 m, const int) {
-    return mat3 (m[0]--, m[1]--, m[2]--);
+mat3 __postDecr(inout mat3 m)
+{
+   __retVal = m;
+   m[0] = m[0] - vec3(1.0);
+   m[1] = m[1] - vec3(1.0);
+   m[2] = m[2] - vec3(1.0);
 }
 
-mat4 __operator -- (inout mat4 m, const int) {
-    return mat4 (m[0]--, m[1]--, m[2]--, m[3]--);
+mat4 __postDecr(inout mat4 m)
+{
+   __retVal = m;
+   m[0] = m[0] - vec4(1.0);
+   m[1] = m[1] - vec4(1.0);
+   m[2] = m[2] - vec4(1.0);
+   m[3] = m[3] - vec4(1.0);
 }
 
-float __operator ++ (inout float a, const int) {
-    float b = a;
-    ++a;
-    return b;
+
+//// post-increment
+
+float __postIncr(inout float a)
+{
+   __retVal = a;
+   a = a + 1;
 }
 
-int __operator ++ (inout int a, const int) {
-    int b = a;
-    ++a;
-    return b;
+vec2 __postIncr(inout vec2 v)
+{
+   __retVal = v;
+   v = v + vec2(1.0);
 }
 
-vec2 __operator ++ (inout vec2 v, const int) {
-    return vec2 (v.x++, v.y++);
+vec3 __postIncr(inout vec3 v)
+{
+   __retVal = v;
+   v = v + vec3(1.0);
 }
 
-vec3 __operator ++ (inout vec3 v, const int) {
-    return vec3 (v.x++, v.y++, v.z++);
+vec4 __postIncr(inout vec4 v)
+{
+   __retVal = v;
+   v = v + vec4(1.0);
 }
 
-vec4 __operator ++ (inout vec4 v, const int) {
-    return vec4 (v.x++, v.y++, v.z++, v.w++);
+
+int __postIncr(inout int a)
+{
+   __retVal = a;
+   a = a + 1;
 }
 
-ivec2 __operator ++ (inout ivec2 v, const int) {
-    return ivec2 (v.x++, v.y++);
+ivec2 __postIncr(inout ivec2 v)
+{
+   __retVal = v;
+   v = v + ivec2(1);
 }
 
-ivec3 __operator ++ (inout ivec3 v, const int) {
-    return ivec3 (v.x++, v.y++, v.z++);
+ivec3 __postIncr(inout ivec3 v)
+{
+   __retVal = v;
+   v = v + ivec3(1);
 }
 
-ivec4 __operator ++ (inout ivec4 v, const int) {
-    return ivec4 (v.x++, v.y++, v.z++, v.w++);
+ivec4 __postIncr(inout ivec4 v)
+{
+   __retVal = v;
+   v = v + ivec3(1);
 }
 
-mat2 __operator ++ (inout mat2 m, const int) {
-    return mat2 (m[0]++, m[1]++);
+
+mat2 __postIncr(inout mat2 m)
+{
+   mat2 n = m;
+   m[0] = m[0] + vec2(1.0);
+   m[1] = m[1] + vec2(1.0);
+   return n;
 }
 
-mat3 __operator ++ (inout mat3 m, const int) {
-    return mat3 (m[0]++, m[1]++, m[2]++);
+mat3 __postIncr(inout mat3 m)
+{
+   mat3 n = m;
+   m[0] = m[0] + vec3(1.0);
+   m[1] = m[1] + vec3(1.0);
+   m[2] = m[2] + vec3(1.0);
+   return n;
 }
 
-mat4 __operator ++ (inout mat4 m, const int) {
-    return mat4 (m[0]++, m[1]++, m[2]++, m[3]++);
+mat4 __postIncr(inout mat4 m)
+{
+   mat4 n = m;
+   m[0] = m[0] + vec4(1.0);
+   m[1] = m[1] + vec4(1.0);
+   m[2] = m[2] + vec4(1.0);
+   m[3] = m[3] + vec4(1.0);
+   return n;
 }
 
-bool __operator < (const float a, const float b) {
-    bool c;
-    __asm float_less c, a, b;
-    return c;
+
+
+//// inequality operators
+
+
+// XXX are the inequality operators for floats/ints really needed????
+bool __operator < (const float a, const float b)
+{
+   __asm vec4_sgt __retVal.x, b, a;
 }
 
+
 bool __operator < (const int a, const int b) {
     return float (a) < float (b);
 }
@@ -1546,25 +2513,25 @@ bool __operator <= (const int a, const int b) {
     return float (a) <= float (b);
 }
 
-bool __operator ^^ (const bool a, const bool b) {
-    return a != b;
-}
 
-//
-// These operators are handled internally by the compiler:
-//
-// bool __operator && (bool a, bool b) {
-//     return a ? b : false;
-// }
-// bool __operator || (bool a, bool b) {
-//     return a ? true : b;
-// }
-//
 
-bool __operator ! (const bool a) {
-    return a == false;
+bool __logicalNot(const bool a)
+{
+   if (a)
+      return false;
+   return true;
+}
+
+bool __logicalXor(const bool a, const bool b)
+{
+   // XXX   return a != b;
+   if (a)
+      return __logicalNot(b);
+   return b;
 }
 
+
+
 //
 // MESA-specific extension functions.
 //