summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <idr@us.ibm.com>2008-02-11 15:27:13 -0800
committerIan Romanick <idr@us.ibm.com>2008-02-14 10:08:48 -0800
commitc2903679856856e3758ceb744cd7d91af9e2eb45 (patch)
tree2140a3e63b0ce009b5331003576417ab6f028549
parent0230c56ed0db16f007e4d1881554c7dbfa3ac3de (diff)
Vectorize a couple fetch functions
These C-coded vectorized functions are expected to be short lived. They're basically a proof-of-concept for dynamically generated fetch routines.
-rw-r--r--src/mesa/pipe/cell/spu/spu_vertex_fetch.c46
1 files changed, 42 insertions, 4 deletions
diff --git a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c
index 6e86a919ce..ec10bb99df 100644
--- a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c
+++ b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c
@@ -46,6 +46,48 @@
static const vec_float4 defaults = { 0.0, 0.0, 0.0, 1.0 };
+static INLINE qword
+fetch_unaligned_qword(const void *ptr)
+{
+ const int shift = (unsigned)(ptr) & 0x0f;
+ const qword x = *(qword *)(ptr);
+ const qword y = *(qword *)(ptr + 16);
+
+ return si_or((qword) spu_slqwbyte(x, shift),
+ (qword) spu_rlmaskqwbyte(y, shift - 16));
+}
+
+static qword
+fetch_R32G32B32A32_FLOAT(const void *ptr)
+{
+ return fetch_unaligned_qword(ptr);
+}
+
+
+static qword
+fetch_R32G32B32A32_USCALED(const void *ptr)
+{
+ return si_cuflt(fetch_unaligned_qword(ptr), 0);
+}
+
+
+static qword
+fetch_R32G32B32A32_UNORM(const void *ptr)
+{
+ qword x = si_cuflt(fetch_unaligned_qword(ptr), 0);
+ vec_float4 scale = spu_splats(1.0f / 255.0f);
+
+ return si_fm(x, (qword) scale);
+}
+
+
+static qword
+fetch_R32G32B32A32_SSCALED(const void *ptr)
+{
+ return si_csflt(fetch_unaligned_qword(ptr), 0);
+}
+
+
/**
* Fetch a float[4] vertex attribute from memory, doing format/type
* conversion as needed.
@@ -90,22 +132,18 @@ FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT )
FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT )
FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT )
-FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT )
FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT )
FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT )
FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT )
-FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED )
FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED )
FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED )
FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED )
-FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED )
FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED )
FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED )
FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED )
-FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM )
FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM )
FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM )
FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM )