Merge branch 'upstream-gallium-0.1' into darktama-gallium-0.1

author: Ben Skeggs <skeggsb@gmail.com> 2008-02-13 14:21:23 +1100
committer: Ben Skeggs <skeggsb@gmail.com> 2008-02-13 14:21:23 +1100
commit: b397a2bb203c2b28b746af7828d9ad192cde0bc1 (patch)
tree: 77448bf1db79ae112f057717109aaa98e5696b15 /src/mesa/pipe
parent: 5ba3dbe2cc8a9af5cae01f45eaf497f834400170 (diff)
parent: e20e89e48287808068086ec148920dd89495d813 (diff)
30 files changed, 2213 insertions, 1358 deletions
diff --git a/src/mesa/pipe/cell/ppu/cell_state_fs.c b/src/mesa/pipe/cell/ppu/cell_state_fs.c
index 96a52273b0..3f46a87d18 100644
--- a/src/mesa/pipe/cell/ppu/cell_state_fs.c
+++ b/src/mesa/pipe/cell/ppu/cell_state_fs.c
@@ -67,7 +67,7 @@ cell_create_fs_state(struct pipe_context *pipe,
 #endif
 
 #ifdef MESA_LLVM
-   state->llvm_prog = gallivm_from_tgsi(state->shader.tokens, GALLIVM_FS);
+   state->llvm_prog = 0;
    if (!gallivm_global_cpu_engine()) {
       gallivm_cpu_engine_create(state->llvm_prog);
    }
diff --git a/src/mesa/pipe/draw/draw_vertex_shader.c b/src/mesa/pipe/draw/draw_vertex_shader.c
index 5ca93aa615..9c31df1e3e 100644
--- a/src/mesa/pipe/draw/draw_vertex_shader.c
+++ b/src/mesa/pipe/draw/draw_vertex_shader.c
@@ -113,7 +113,15 @@ run_vertex_program(struct draw_context *draw,
    draw->vertex_fetch.fetch_func( draw, machine, elts, count );
 
    /* run shader */
-#if defined(__i386__) || defined(__386__)
+#ifdef MESA_LLVM
+   if (1) {
+   struct gallivm_prog  *prog  = draw->vertex_shader->llvm_prog;
+   gallivm_cpu_vs_exec(prog,
+                       machine->Inputs,
+                       machine->Outputs,
+                       machine->Consts);
+   } else
+#elif defined(__i386__) || defined(__386__)
    if (draw->use_sse) {
       /* SSE */
       /* cast away const */
@@ -212,13 +220,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw)
     */
    draw_update_vertex_fetch( draw );
 
-//   debug_printf( " q(%d) ", draw->vs.queue_nr );
-#ifdef MESA_LLVM
-   if (draw->vertex_shader->llvm_prog) {
-      draw_vertex_shader_queue_flush_llvm(draw);
-      return;
-   }
-#endif
+//   fprintf(stderr, " q(%d) ", draw->vs.queue_nr );
 
    /* run vertex shader on vertex cache entries, four per invokation */
    for (i = 0; i < draw->vs.queue_nr; i += 4) {
@@ -260,7 +262,13 @@ draw_create_vertex_shader(struct draw_context *draw,
    vs->state = shader;
 
 #ifdef MESA_LLVM
-   vs->llvm_prog = gallivm_from_tgsi(shader->tokens, GALLIVM_VS);
+   struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
+   gallivm_ir_set_layout(ir, GALLIVM_SOA);
+   gallivm_ir_set_components(ir, 4);
+   gallivm_ir_fill_from_tgsi(ir, shader->tokens);
+   vs->llvm_prog = gallivm_ir_compile(ir);
+   gallivm_ir_delete(ir);
+
    draw->engine = gallivm_global_cpu_engine();
    if (!draw->engine) {
       draw->engine = gallivm_cpu_engine_create(vs->llvm_prog);
@@ -296,7 +304,7 @@ draw_bind_vertex_shader(struct draw_context *draw,
    draw->vertex_shader = dvs;
    draw->num_vs_outputs = dvs->state->num_outputs;
 
-   /* specify the fragment program to interpret/execute */
+   /* specify the vertex program to interpret/execute */
    tgsi_exec_machine_init(&draw->machine,
                           draw->vertex_shader->state->tokens,
                           PIPE_MAX_SAMPLERS,
diff --git a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
deleted file mode 100644
index 63551c993e..0000000000
--- a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
-  * Authors:
-  *   Zack Rusin zack@tungstengraphics.com
-  */
-
-#include "pipe/p_util.h"
-#include "draw_private.h"
-#include "draw_context.h"
-
-#ifdef MESA_LLVM
-
-#include "pipe/llvm/gallivm.h"
-#include "pipe/p_shader_tokens.h"
-
-#define DBG 0
-
-static INLINE void
-fetch_attrib4(const void *ptr, enum pipe_format format, float attrib[4])
-{
-   /* defaults */
-   attrib[1] = 0.0;
-   attrib[2] = 0.0;
-   attrib[3] = 1.0;
-   switch (format) {
-   case PIPE_FORMAT_R32G32B32A32_FLOAT:
-      attrib[3] = ((float *) ptr)[3];
-      /* fall-through */
-   case PIPE_FORMAT_R32G32B32_FLOAT:
-      attrib[2] = ((float *) ptr)[2];
-      /* fall-through */
-   case PIPE_FORMAT_R32G32_FLOAT:
-      attrib[1] = ((float *) ptr)[1];
-      /* fall-through */
-   case PIPE_FORMAT_R32_FLOAT:
-      attrib[0] = ((float *) ptr)[0];
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * Fetch vertex attributes for 'count' vertices.
- */
-static INLINE
-void vertex_fetch(struct draw_context *draw,
-                  const unsigned elt,
-                  float (*inputs)[4])
-{
-   uint attr;
-
-   /* loop over vertex attributes (vertex shader inputs) */
-   for (attr = 0; attr < draw->vertex_shader->state->num_inputs; attr++) {
-
-      unsigned buf = draw->vertex_element[attr].vertex_buffer_index;
-      const void *src
-         = (const void *) ((const ubyte *) draw->user.vbuffer[buf]
-                           + draw->vertex_buffer[buf].buffer_offset
-                           + draw->vertex_element[attr].src_offset
-                           + elt * draw->vertex_buffer[buf].pitch);
-      fetch_attrib4(src, draw->vertex_element[attr].src_format, inputs[attr]);
-   }
-}
-
-static INLINE unsigned
-compute_clipmask(const float *clip, const float (*plane)[4], unsigned nr)
-{
-   unsigned mask = 0;
-   unsigned i;
-
-   for (i = 0; i < nr; i++) {
-      if (dot4(clip, plane[i]) < 0)
-         mask |= (1<<i);
-   }
-
-   return mask;
-}
-
-
-/**
- * Called by the draw module when the vertx cache needs to be flushed.
- * This involves running the vertex shader.
- */
-void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
-{
-   unsigned i;
-
-   struct vertex_header *dests[VS_QUEUE_LENGTH];
-   float                 inputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4] ALIGN16_ATTRIB;
-   float                 outputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4] ALIGN16_ATTRIB;
-   float (*consts)[4]          = (float (*)[4]) draw->user.constants;
-   struct gallivm_prog  *prog  = draw->vertex_shader->llvm_prog;
-   const float          *scale = draw->viewport.scale;
-   const float          *trans = draw->viewport.translate;
-   /* fetch the inputs */
-   for (i = 0; i < draw->vs.queue_nr; ++i) {
-      unsigned elt = draw->vs.queue[i].elt;
-      dests[i] = draw->vs.queue[i].dest;
-      vertex_fetch(draw, elt, inputs[i]);
-   }
-
-   /* batch execute the shaders on all the vertices */
-   gallivm_prog_exec(prog, inputs, outputs, consts,
-                     draw->vs.queue_nr,
-                     draw->vertex_shader->state->num_inputs,
-                     draw->vertex_shader->state->num_outputs);
-
-
-   /* store machine results */
-   for (int i = 0; i < draw->vs.queue_nr; ++i) {
-      unsigned slot;
-      float x, y, z, w;
-      struct vertex_header *vOut = draw->vs.queue[i].dest;
-      float (*dests)[4] = outputs[i];
-
-      /* Handle attr[0] (position) specially:
-       *
-       * XXX: Computing the clipmask should be done in the vertex
-       * program as a set of DP4 instructions appended to the
-       * user-provided code.
-       */
-      x = vOut->clip[0] = dests[0][0];
-      y = vOut->clip[1] = dests[0][1];
-      z = vOut->clip[2] = dests[0][2];
-      w = vOut->clip[3] = dests[0][3];
-#if DBG
-      debug_printf("output %d: %f %f %f %f\n", 0, x, y, z, w);
-#endif
-
-      vOut->clipmask = compute_clipmask(vOut->clip, draw->plane, draw->nr_planes);
-      vOut->edgeflag = 1;
-      /* divide by w */
-      w = 1.0f / w;
-      x *= w;
-      y *= w;
-      z *= w;
-
-      /* Viewport mapping */
-      vOut->data[0][0] = x * scale[0] + trans[0];
-      vOut->data[0][1] = y * scale[1] + trans[1];
-      vOut->data[0][2] = z * scale[2] + trans[2];
-      vOut->data[0][3] = w;
-
-      /* Remaining attributes are packed into sequential post-transform
-       * vertex attrib slots.
-       */
-      for (slot = 1; slot < draw->num_vs_outputs; slot++) {
-         vOut->data[slot][0] = dests[slot][0];
-         vOut->data[slot][1] = dests[slot][1];
-         vOut->data[slot][2] = dests[slot][2];
-         vOut->data[slot][3] = dests[slot][3];
-
-#if DBG
-         debug_printf("output %d: %f %f %f %f\n", slot,
-                vOut->data[slot][0],
-                vOut->data[slot][1],
-                vOut->data[slot][2],
-                vOut->data[slot][3]);
-#endif
-      }
-   } /* loop over vertices */
-
-   draw->vs.queue_nr = 0;
-}
-
-#endif /* MESA_LLVM */
diff --git a/src/mesa/pipe/draw/draw_wide_prims.c b/src/mesa/pipe/draw/draw_wide_prims.c
index 9759e7e2e8..4c7e279b20 100644
--- a/src/mesa/pipe/draw/draw_wide_prims.c
+++ b/src/mesa/pipe/draw/draw_wide_prims.c
@@ -76,7 +76,6 @@ static void passthrough_tri( struct draw_stage *stage,
 
 /**
  * Draw a wide line by drawing a quad (two triangles).
- * XXX still need line stipple.
  * XXX need to disable polygon stipple.
  */
 static void wide_line( struct draw_stage *stage,
@@ -103,12 +102,9 @@ static void wide_line( struct draw_stage *stage,
    /*
     * Draw wide line as a quad (two tris) by "stretching" the line along
     * X or Y.
-    * XXX For AA lines, the quad corners have to be computed in a
-    * more sophisticated way.
+    * We need to tweak coords in several ways to be conformant here.
     */
 
-   /* need to tweak coords in several ways to be conformant here */
-
    if (dx > dy) {
       /* x-major line */
       pos0[1] = pos0[1] - half_width - 0.25f;
@@ -166,6 +162,70 @@ static void wide_line( struct draw_stage *stage,
 
 
 /**
+ * Draw a wide line by drawing a quad, using geometry which will
+ * fullfill GL's antialiased line requirements.
+ */
+static void wide_line_aa(struct draw_stage *stage,
+                         struct prim_header *header)
+{
+   const struct wide_stage *wide = wide_stage(stage);
+   const float half_width = wide->half_line_width;
+   struct prim_header tri;
+   struct vertex_header *v[4];
+   float *pos;
+   float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0];
+   float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1];
+   const float len = sqrt(dx * dx + dy * dy);
+   uint i;
+
+   dx = dx * half_width / len;
+   dy = dy * half_width / len;
+
+   /* allocate/dup new verts */
+   for (i = 0; i < 4; i++) {
+      v[i] = dup_vert(stage, header->v[i/2], i);
+   }
+
+   /*
+    * Quad for line from v0 to v1:
+    *
+    *  1                         3
+    *  +-------------------------+
+    *  |                         |
+    *  *v0                     v1*
+    *  |                         |
+    *  +-------------------------+
+    *  0                         2
+    */
+
+   pos = v[0]->data[0];
+   pos[0] += dy;
+   pos[1] -= dx;
+
+   pos = v[1]->data[0];
+   pos[0] -= dy;
+   pos[1] += dx;
+
+   pos = v[2]->data[0];
+   pos[0] += dy;
+   pos[1] -= dx;
+
+   pos = v[3]->data[0];
+   pos[0] -= dy;
+   pos[1] += dx;
+
+   tri.det = header->det;  /* only the sign matters */
+
+   tri.v[0] = v[2];  tri.v[1] = v[1];  tri.v[2] = v[0];
+   stage->next->tri( stage->next, &tri );
+
+   tri.v[0] = v[3];  tri.v[1] = v[1];  tri.v[2] = v[2];
+   stage->next->tri( stage->next, &tri );
+
+}
+
+
+/**
  * Set the vertex texcoords for sprite mode.
  * Coords may be left untouched or set to a right-side-up or upside-down
  * orientation.
@@ -319,7 +379,10 @@ static void wide_first_line( struct draw_stage *stage,
    wide->half_line_width = 0.5f * draw->rasterizer->line_width;
 
    if (draw->rasterizer->line_width != 1.0) {
-      wide->stage.line = wide_line;
+      if (draw->rasterizer->line_smooth)
+         wide->stage.line = wide_line_aa;
+      else
+         wide->stage.line = wide_line;
    }
    else {
       wide->stage.line = passthrough_line;
diff --git a/src/mesa/pipe/i915simple/i915_state.c b/src/mesa/pipe/i915simple/i915_state.c
index 950ea52d60..abd5571b88 100644
--- a/src/mesa/pipe/i915simple/i915_state.c
+++ b/src/mesa/pipe/i915simple/i915_state.c
@@ -250,6 +250,13 @@ i915_create_sampler_state(struct pipe_context *pipe,
    if (sampler->normalized_coords)
       cso->state[1] |= SS3_NORMALIZED_COORDS;
 
+   if (0) /* XXX not tested yet */
+   {
+      int minlod = (int) (16.0 * sampler->min_lod);
+      minlod = CLAMP(minlod, 0, 16 * 11);
+      cso->state[1] |= (minlod << SS3_MIN_LOD_SHIFT);
+   }
+
    {
       ubyte r = float_to_ubyte(sampler->border_color[0]);
       ubyte g = float_to_ubyte(sampler->border_color[1]);
diff --git a/src/mesa/pipe/i915simple/i915_state_sampler.c b/src/mesa/pipe/i915simple/i915_state_sampler.c
index 0dbbc5241d..9c1a5bbbd6 100644
--- a/src/mesa/pipe/i915simple/i915_state_sampler.c
+++ b/src/mesa/pipe/i915simple/i915_state_sampler.c
@@ -185,7 +185,7 @@ i915_update_texture(struct i915_context *i915, uint unit,
    const struct pipe_texture *pt = &tex->base;
    uint format, pitch;
    const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
-   const uint num_levels = pt->last_level - pt->first_level;
+   const uint num_levels = pt->last_level;
 
    assert(tex);
    assert(width);
diff --git a/src/mesa/pipe/i915simple/i915_texture.c b/src/mesa/pipe/i915simple/i915_texture.c
index 6faeab134a..6d37ae3d74 100644
--- a/src/mesa/pipe/i915simple/i915_texture.c
+++ b/src/mesa/pipe/i915simple/i915_texture.c
@@ -118,11 +118,11 @@ i945_miptree_layout_2d( struct i915_texture *tex )
    tex->pitch = pt->width[0];
 
    /* May need to adjust pitch to accomodate the placement of
-    * the 2nd mipmap.  This occurs when the alignment
+    * the 2nd mipmap level.  This occurs when the alignment
     * constraints of mipmap placement push the right edge of the
-    * 2nd mipmap out past the width of its parent.
+    * 2nd mipmap level out past the width of its parent.
     */
-   if (pt->first_level != pt->last_level) {
+   if (pt->last_level > 0) {
       unsigned mip1_width = align_int(minify(pt->width[0]), align_w)
 			+ minify(minify(pt->width[0]));
 
@@ -136,7 +136,7 @@ i945_miptree_layout_2d( struct i915_texture *tex )
    tex->pitch = align_int(tex->pitch * pt->cpp, 4) / pt->cpp;
    tex->total_height = 0;
 
-   for ( level = pt->first_level ; level <= pt->last_level ; level++ ) {
+   for (level = 0; level <= pt->last_level; level++) {
       unsigned img_height;
 
       i915_miptree_set_level_info(tex, level, 1, x, y, width, height, 1);
@@ -152,9 +152,9 @@ i945_miptree_layout_2d( struct i915_texture *tex )
        */
       tex->total_height = MAX2(tex->total_height, y + img_height);
 
-      /* Layout_below: step right after second mipmap.
+      /* Layout_below: step right after second mipmap level.
        */
-      if (level == pt->first_level + 1) {
+      if (level == 1) {
 	 x += align_int(width, align_w);
       }
       else {
@@ -204,7 +204,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex)
          tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp;
          tex->total_height = dim * 4;
 
-         for (level = pt->first_level; level <= pt->last_level; level++) {
+         for (level = 0; level <= pt->last_level; level++) {
             i915_miptree_set_level_info(tex, level, 6,
                                          0, 0,
                                          /*OLD: tex->pitch, tex->total_height,*/
@@ -219,7 +219,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex)
             unsigned y = initial_offsets[face][1] * dim;
             unsigned d = dim;
 
-            for (level = pt->first_level; level <= pt->last_level; level++) {
+            for (level = 0; level <= pt->last_level; level++) {
                i915_miptree_set_image_offset(tex, level, face, x, y);
                d >>= 1;
                x += step_offsets[face][0] * d;
@@ -240,7 +240,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex)
 
          /* XXX: hardware expects/requires 9 levels at minimum.
           */
-         for (level = pt->first_level; level <= MAX2(8, pt->last_level);
+         for (level = 0; level <= MAX2(8, pt->last_level);
               level++) {
             i915_miptree_set_level_info(tex, level, depth, 0, tex->total_height,
                                          width, height, depth);
@@ -256,7 +256,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex)
          /* Fixup depth image_offsets: 
           */
          depth = pt->depth[0];
-         for (level = pt->first_level; level <= pt->last_level; level++) {
+         for (level = 0; level <= pt->last_level; level++) {
             unsigned i;
             for (i = 0; i < depth; i++) 
                i915_miptree_set_image_offset(tex, level, i,
@@ -282,7 +282,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex)
          tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp;
          tex->total_height = 0;
 
-         for (level = pt->first_level; level <= pt->last_level; level++) {
+         for (level = 0; level <= pt->last_level; level++) {
             i915_miptree_set_level_info(tex, level, 1,
                                          0, tex->total_height,
                                          width, height, 1);
@@ -337,7 +337,7 @@ i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex)
 
          /* Set all the levels to effectively occupy the whole rectangular region. 
           */
-         for (level = pt->first_level; level <= pt->last_level; level++) {
+         for (level = 0; level <= pt->last_level; level++) {
             i915_miptree_set_level_info(tex, level, 6,
                                          0, 0,
                                          lvlWidth, lvlHeight, 1);
@@ -355,12 +355,12 @@ i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex)
                y = tex->total_height - 4;
                x = (face - 4) * 8;
             }
-            else if (dim < 4 && (face > 0 || pt->first_level > 0)) {
+            else if (dim < 4 && (face > 0)) {
                y = tex->total_height - 4;
                x = face * 8;
             }
 
-            for (level = pt->first_level; level <= pt->last_level; level++) {
+            for (level = 0; level <= pt->last_level; level++) {
                i915_miptree_set_image_offset(tex, level, face, x, y);
 
                d >>= 1;
@@ -418,7 +418,7 @@ i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex)
          pack_x_pitch = tex->pitch;
          pack_x_nr = 1;
 
-         for (level = pt->first_level; level <= pt->last_level; level++) {
+         for (level = 0; level <= pt->last_level; level++) {
             unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6;
             int x = 0;
             int y = 0;
diff --git a/src/mesa/pipe/i965simple/brw_tex_layout.c b/src/mesa/pipe/i965simple/brw_tex_layout.c
index 405fd1f794..90561f1307 100644
--- a/src/mesa/pipe/i965simple/brw_tex_layout.c
+++ b/src/mesa/pipe/i965simple/brw_tex_layout.c
@@ -146,7 +146,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
     * constraints of mipmap placement push the right edge of the
     * 2nd mipmap out past the width of its parent.
     */
-   if (pt->first_level != pt->last_level) {
+   if (pt->last_level > 0) {
       unsigned mip1_width;
 
       if (pt->compressed) {
@@ -168,7 +168,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
    tex->pitch = align(tex->pitch * pt->cpp, 4) / pt->cpp;
    tex->total_height = 0;
 
-   for ( level = pt->first_level ; level <= pt->last_level ; level++ ) {
+   for (level = 0; level <= pt->last_level; level++) {
       unsigned img_height;
 
       intel_miptree_set_level_info(tex, level, 1, x, y, width,
@@ -187,7 +187,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
 
       /* Layout_below: step right after second mipmap.
        */
-      if (level == pt->first_level + 1) {
+      if (level == 1) {
 	 x += align(width, align_w);
       }
       else {
@@ -234,7 +234,7 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture
       pack_x_pitch = tex->pitch;
       pack_x_nr = 1;
 
-      for ( level = pt->first_level ; level <= pt->last_level ; level++ ) {
+      for (level = 0; level <= pt->last_level; level++) {
 	 unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6;
 	 int x = 0;
 	 int y = 0;
diff --git a/src/mesa/pipe/i965simple/brw_wm_surface_state.c b/src/mesa/pipe/i965simple/brw_wm_surface_state.c
index cbb4f2efd3..d16d919bce 100644
--- a/src/mesa/pipe/i965simple/brw_wm_surface_state.c
+++ b/src/mesa/pipe/i965simple/brw_wm_surface_state.c
@@ -154,7 +154,7 @@ void brw_update_texture_surface( struct brw_context *brw,
    /* Updated in emit_reloc */
    surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer );
 
-   surf.ss2.mip_count = tObj->base.last_level - tObj->base.first_level;
+   surf.ss2.mip_count = tObj->base.last_level;
    surf.ss2.width = tObj->base.width[0] - 1;
    surf.ss2.height = tObj->base.height[0] - 1;
 
diff --git a/src/mesa/pipe/llvm/Makefile b/src/mesa/pipe/llvm/Makefile
index f655fb8340..a0494ba966 100644
--- a/src/mesa/pipe/llvm/Makefile
+++ b/src/mesa/pipe/llvm/Makefile
@@ -7,8 +7,13 @@ LIBNAME = gallivm
 
 GALLIVM_SOURCES = \
         gallivm.cpp  \
+        gallivm_cpu.cpp \
         instructions.cpp  \
-        storage.cpp
+        loweringpass.cpp \
+        tgsitollvm.cpp \
+        storage.cpp \
+        storagesoa.cpp \
+        instructionssoa.cpp
 
 INC_SOURCES = gallivm_builtins.cpp llvm_base_shader.cpp
 
diff --git a/src/mesa/pipe/llvm/gallivm.cpp b/src/mesa/pipe/llvm/gallivm.cpp
index afa1446890..b99dc6db5b 100644
--- a/src/mesa/pipe/llvm/gallivm.cpp
+++ b/src/mesa/pipe/llvm/gallivm.cpp
@@ -32,16 +32,17 @@
 #ifdef MESA_LLVM
 
 #include "gallivm.h"
+#include "gallivm_p.h"
 
 #include "instructions.h"
+#include "loweringpass.h"
 #include "storage.h"
+#include "tgsitollvm.h"
 
 #include "pipe/p_context.h"
 #include "pipe/p_shader_tokens.h"
-#include "pipe/tgsi/util/tgsi_parse.h"
+
 #include "pipe/tgsi/exec/tgsi_exec.h"
-#include "pipe/tgsi/util/tgsi_util.h"
-#include "pipe/tgsi/util/tgsi_build.h"
 #include "pipe/tgsi/util/tgsi_dump.h"
 
 #include <llvm/Module.h>
@@ -63,38 +64,20 @@
 #include <llvm/Analysis/LoopPass.h>
 #include <llvm/Target/TargetData.h>
 #include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/Transforms/Utils/Cloning.h>
 
 #include <sstream>
 #include <fstream>
 #include <iostream>
 
-struct gallivm_interpolate {
-   int attrib;
-   int chan;
-   int type;
-};
-
-struct gallivm_prog {
-   llvm::Module *module;
-   void *function;
-   int   num_consts;
-   int   id;
-   enum gallivm_shader_type type;
-
-   struct gallivm_interpolate interpolators[32*4]; //FIXME: this might not be enough for some shaders
-   int   num_interp;
-};
-
-struct gallivm_cpu_engine {
-   llvm::ExecutionEngine *engine;
-};
+static int GLOBAL_ID = 0;
 
 using namespace llvm;
-#include "llvm_base_shader.cpp"
-
-static int GLOBAL_ID = 0;
 
-static inline void AddStandardCompilePasses(PassManager &PM) {
+static inline
+void AddStandardCompilePasses(PassManager &PM)
+{
+   PM.add(new LoweringPass());
    PM.add(createVerifierPass());                  // Verify that input is correct
 
    PM.add(createLowerSetJmpPass());          // Lower llvm.setjmp/.longjmp
@@ -150,721 +133,14 @@ static inline void AddStandardCompilePasses(PassManager &PM) {
    PM.add(createConstantMergePass());        // Merge dup global constants
 }
 
-static inline void
-add_interpolator(struct gallivm_prog *prog,
-                 struct gallivm_interpolate *interp)
-{
-   prog->interpolators[prog->num_interp] = *interp;
-   ++prog->num_interp;
-}
-
-static void
-translate_declaration(struct gallivm_prog *prog,
-                      llvm::Module *module,
-                      Storage *storage,
-                      struct tgsi_full_declaration *decl,
-                      struct tgsi_full_declaration *fd)
-{
-   if (decl->Declaration.File == TGSI_FILE_INPUT) {
-      unsigned first, last, mask;
-      uint interp_method;
-
-      assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE);
-
-      first = decl->u.DeclarationRange.First;
-      last = decl->u.DeclarationRange.Last;
-      mask = decl->Declaration.UsageMask;
-
-      /* Do not touch WPOS.xy */
-      if (first == 0) {
-         mask &= ~TGSI_WRITEMASK_XY;
-         if (mask == TGSI_WRITEMASK_NONE) {
-            first++;
-            if (first > last) {
-               return;
-            }
-         }
-      }
-
-      interp_method = decl->Interpolation.Interpolate;
-
-      if (mask == TGSI_WRITEMASK_XYZW) {
-         unsigned i, j;
-
-         for (i = first; i <= last; i++) {
-            for (j = 0; j < NUM_CHANNELS; j++) {
-               //interp( mach, i, j );
-               struct gallivm_interpolate interp;
-               interp.type = interp_method;
-               interp.attrib = i;
-               interp.chan = j;
-               add_interpolator(prog, &interp);
-            }
-         }
-      } else {
-         unsigned i, j;
-         for( j = 0; j < NUM_CHANNELS; j++ ) {
-            if( mask & (1 << j) ) {
-               for( i = first; i <= last; i++ ) {
-                  struct gallivm_interpolate interp;
-                  interp.type = interp_method;
-                  interp.attrib = i;
-                  interp.chan = j;
-                  add_interpolator(prog, &interp);
-               }
-            }
-         }
-      }
-   }
-}
-
-
-static void
-translate_immediate(Storage *storage,
-                    struct tgsi_full_immediate *imm)
-{
-   float vec[4];
-   int i;
-   for (i = 0; i < imm->Immediate.Size - 1; ++i) {
-      switch( imm->Immediate.DataType ) {
-      case TGSI_IMM_FLOAT32:
-         vec[i] = imm->u.ImmediateFloat32[i].Float;
-         break;
-      default:
-         assert( 0 );
-      }
-   }
-   storage->addImmediate(vec);
-}
-
-static inline llvm::Value *
-swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src,
-              Storage *storage)
-{
-   int swizzle = 0;
-   int start = 1000;
-   const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 +
-                          TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W;
-   for (int k = 0; k < 4; ++k) {
-      swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start;
-      start /= 10;
-   }
-   if (swizzle != NO_SWIZZLE) {
-      /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/
-      val = storage->shuffleVector(val, swizzle);
-   }
-   return val;
-}
-
-static void
-translate_instruction(llvm::Module *module,
-                      Storage *storage,
-                      Instructions *instr,
-                      struct tgsi_full_instruction *inst,
-                      struct tgsi_full_instruction *fi,
-                      unsigned instno)
-{
-   llvm::Value *inputs[4];
-   inputs[0] = 0;
-   inputs[1] = 0;
-   inputs[2] = 0;
-   inputs[3] = 0;
-
-   for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
-      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
-      llvm::Value *val = 0;
-      llvm::Value *indIdx = 0;
-
-      if (src->SrcRegister.Indirect) {
-         indIdx = storage->addrElement(src->SrcRegisterInd.Index);
-         indIdx = storage->extractIndex(indIdx);
-      }
-      if (src->SrcRegister.File == TGSI_FILE_CONSTANT) {
-         val = storage->constElement(src->SrcRegister.Index, indIdx);
-      } else if (src->SrcRegister.File == TGSI_FILE_INPUT) {
-         val = storage->inputElement(src->SrcRegister.Index, indIdx);
-      } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) {
-         val = storage->tempElement(src->SrcRegister.Index);
-      } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) {
-         val = storage->outputElement(src->SrcRegister.Index, indIdx);
-      } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
-         val = storage->immediateElement(src->SrcRegister.Index);
-      } else {
-         fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File);
-         return;
-      }
-
-      inputs[i] = swizzleVector(val, src, storage);
-   }
-
-   /*if (inputs[0])
-     instr->printVector(inputs[0]);
-     if (inputs[1])
-     instr->printVector(inputs[1]);*/
-   llvm::Value *out = 0;
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_ARL: {
-      out = instr->arl(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_MOV: {
-      out = inputs[0];
-   }
-      break;
-   case TGSI_OPCODE_LIT: {
-      out = instr->lit(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_RCP: {
-      out = instr->rcp(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_RSQ: {
-      out = instr->rsq(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_EXP:
-      break;
-   case TGSI_OPCODE_LOG:
-      break;
-   case TGSI_OPCODE_MUL: {
-      out = instr->mul(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_ADD: {
-      out = instr->add(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_DP3: {
-      out = instr->dp3(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_DP4: {
-      out = instr->dp4(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_DST: {
-      out = instr->dst(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_MIN: {
-      out = instr->min(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_MAX: {
-      out = instr->max(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_SLT: {
-      out = instr->slt(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_SGE: {
-      out = instr->sge(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_MAD: {
-      out = instr->madd(inputs[0], inputs[1], inputs[2]);
-   }
-      break;
-   case TGSI_OPCODE_SUB: {
-      out = instr->sub(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_LERP: {
-      out = instr->lerp(inputs[0], inputs[1], inputs[2]);
-   }
-      break;
-   case TGSI_OPCODE_CND:
-      break;
-   case TGSI_OPCODE_CND0:
-      break;
-   case TGSI_OPCODE_DOT2ADD:
-      break;
-   case TGSI_OPCODE_INDEX:
-      break;
-   case TGSI_OPCODE_NEGATE:
-      break;
-   case TGSI_OPCODE_FRAC: {
-      out = instr->frc(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_CLAMP:
-      break;
-   case TGSI_OPCODE_FLOOR: {
-      out = instr->floor(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_ROUND:
-      break;
-   case TGSI_OPCODE_EXPBASE2: {
-      out = instr->ex2(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_LOGBASE2: {
-      out = instr->lg2(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_POWER: {
-      out = instr->pow(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_CROSSPRODUCT: {
-      out = instr->cross(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_MULTIPLYMATRIX:
-      break;
-   case TGSI_OPCODE_ABS: {
-      out = instr->abs(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_RCC:
-      break;
-   case TGSI_OPCODE_DPH: {
-      out = instr->dph(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_COS: {
-      out = instr->cos(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_DDX:
-      break;
-   case TGSI_OPCODE_DDY:
-      break;
-   case TGSI_OPCODE_KILP: {
-      out = instr->kilp(inputs[0]);
-      storage->setKilElement(out);
-      return;
-   }
-      break;
-   case TGSI_OPCODE_PK2H:
-      break;
-   case TGSI_OPCODE_PK2US:
-      break;
-   case TGSI_OPCODE_PK4B:
-      break;
-   case TGSI_OPCODE_PK4UB:
-      break;
-   case TGSI_OPCODE_RFL:
-      break;
-   case TGSI_OPCODE_SEQ:
-      break;
-   case TGSI_OPCODE_SFL:
-      break;
-   case TGSI_OPCODE_SGT: {
-      out = instr->sgt(inputs[0], inputs[1]);
-   }
-      break;
-   case TGSI_OPCODE_SIN: {
-      out = instr->sin(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_SLE:
-      break;
-   case TGSI_OPCODE_SNE:
-      break;
-   case TGSI_OPCODE_STR:
-      break;
-   case TGSI_OPCODE_TEX:
-      break;
-   case TGSI_OPCODE_TXD:
-      break;
-   case TGSI_OPCODE_UP2H:
-      break;
-   case TGSI_OPCODE_UP2US:
-      break;
-   case TGSI_OPCODE_UP4B:
-      break;
-   case TGSI_OPCODE_UP4UB:
-      break;
-   case TGSI_OPCODE_X2D:
-      break;
-   case TGSI_OPCODE_ARA:
-      break;
-   case TGSI_OPCODE_ARR:
-      break;
-   case TGSI_OPCODE_BRA:
-      break;
-   case TGSI_OPCODE_CAL: {
-      instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr());
-      return;
-   }
-      break;
-   case TGSI_OPCODE_RET: {
-      instr->end();
-      return;
-   }
-      break;
-   case TGSI_OPCODE_SSG:
-      break;
-   case TGSI_OPCODE_CMP: {
-      out = instr->cmp(inputs[0], inputs[1], inputs[2]);
-   }
-      break;
-   case TGSI_OPCODE_SCS: {
-      out = instr->scs(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_TXB:
-      break;
-   case TGSI_OPCODE_NRM:
-      break;
-   case TGSI_OPCODE_DIV:
-      break;
-   case TGSI_OPCODE_DP2:
-      break;
-   case TGSI_OPCODE_TXL:
-      break;
-   case TGSI_OPCODE_BRK: {
-      instr->brk();
-      return;
-   }
-      break;
-   case TGSI_OPCODE_IF: {
-      instr->ifop(inputs[0]);
-      storage->setCurrentBlock(instr->currentBlock());
-      return;  //just update the state
-   }
-      break;
-   case TGSI_OPCODE_LOOP:
-      break;
-   case TGSI_OPCODE_REP:
-      break;
-   case TGSI_OPCODE_ELSE: {
-      instr->elseop();
-      storage->setCurrentBlock(instr->currentBlock());
-      return; //only state update
-   }
-      break;
-   case TGSI_OPCODE_ENDIF: {
-      instr->endif();
-      storage->setCurrentBlock(instr->currentBlock());
-      return; //just update the state
-   }
-      break;
-   case TGSI_OPCODE_ENDLOOP:
-      break;
-   case TGSI_OPCODE_ENDREP:
-      break;
-   case TGSI_OPCODE_PUSHA:
-      break;
-   case TGSI_OPCODE_POPA:
-      break;
-   case TGSI_OPCODE_CEIL:
-      break;
-   case TGSI_OPCODE_I2F:
-      break;
-   case TGSI_OPCODE_NOT:
-      break;
-   case TGSI_OPCODE_TRUNC: {
-      out = instr->trunc(inputs[0]);
-   }
-      break;
-   case TGSI_OPCODE_SHL:
-      break;
-   case TGSI_OPCODE_SHR:
-      break;
-   case TGSI_OPCODE_AND:
-      break;
-   case TGSI_OPCODE_OR:
-      break;
-   case TGSI_OPCODE_MOD:
-      break;
-   case TGSI_OPCODE_XOR:
-      break;
-   case TGSI_OPCODE_SAD:
-      break;
-   case TGSI_OPCODE_TXF:
-      break;
-   case TGSI_OPCODE_TXQ:
-      break;
-   case TGSI_OPCODE_CONT:
-      break;
-   case TGSI_OPCODE_EMIT:
-      break;
-   case TGSI_OPCODE_ENDPRIM:
-      break;
-   case TGSI_OPCODE_BGNLOOP2: {
-      instr->beginLoop();
-      storage->setCurrentBlock(instr->currentBlock());
-      return;
-   }
-      break;
-   case TGSI_OPCODE_BGNSUB: {
-      instr->bgnSub(instno);
-      storage->setCurrentBlock(instr->currentBlock());
-      storage->pushTemps();
-      return;
-   }
-      break;
-   case TGSI_OPCODE_ENDLOOP2: {
-      instr->endLoop();
-      storage->setCurrentBlock(instr->currentBlock());
-      return;
-   }
-      break;
-   case TGSI_OPCODE_ENDSUB: {
-      instr->endSub();
-      storage->setCurrentBlock(instr->currentBlock());
-      storage->popArguments();
-      storage->popTemps();
-      return;
-   }
-      break;
-   case TGSI_OPCODE_NOISE1:
-      break;
-   case TGSI_OPCODE_NOISE2:
-      break;
-   case TGSI_OPCODE_NOISE3:
-      break;
-   case TGSI_OPCODE_NOISE4:
-      break;
-   case TGSI_OPCODE_NOP:
-      break;
-   case TGSI_OPCODE_TEXBEM:
-      break;
-   case TGSI_OPCODE_TEXBEML:
-      break;
-   case TGSI_OPCODE_TEXREG2AR:
-      break;
-   case TGSI_OPCODE_TEXM3X2PAD:
-      break;
-   case TGSI_OPCODE_TEXM3X2TEX:
-      break;
-   case TGSI_OPCODE_TEXM3X3PAD:
-      break;
-   case TGSI_OPCODE_TEXM3X3TEX:
-      break;
-   case TGSI_OPCODE_TEXM3X3SPEC:
-      break;
-   case TGSI_OPCODE_TEXM3X3VSPEC:
-      break;
-   case TGSI_OPCODE_TEXREG2GB:
-      break;
-   case TGSI_OPCODE_TEXREG2RGB:
-      break;
-   case TGSI_OPCODE_TEXDP3TEX:
-      break;
-   case TGSI_OPCODE_TEXDP3:
-      break;
-   case TGSI_OPCODE_TEXM3X3:
-      break;
-   case TGSI_OPCODE_TEXM3X2DEPTH:
-      break;
-   case TGSI_OPCODE_TEXDEPTH:
-      break;
-   case TGSI_OPCODE_BEM:
-      break;
-   case TGSI_OPCODE_M4X3:
-      break;
-   case TGSI_OPCODE_M3X4:
-      break;
-   case TGSI_OPCODE_M3X3:
-      break;
-   case TGSI_OPCODE_M3X2:
-      break;
-   case TGSI_OPCODE_NRM4:
-      break;
-   case TGSI_OPCODE_CALLNZ:
-      break;
-   case TGSI_OPCODE_IFC:
-      break;
-   case TGSI_OPCODE_BREAKC:
-      break;
-   case TGSI_OPCODE_KIL:
-      break;
-   case TGSI_OPCODE_END:
-      instr->end();
-      return;
-      break;
-   default:
-      fprintf(stderr, "ERROR: Unknown opcode %d\n",
-              inst->Instruction.Opcode);
-      assert(0);
-      break;
-   }
-
-   if (!out) {
-      fprintf(stderr, "ERROR: unsupported opcode %d\n",
-              inst->Instruction.Opcode);
-      assert(!"Unsupported opcode");
-   }
-
-   /* # not sure if we need this */
-   switch( inst->Instruction.Saturate ) {
-   case TGSI_SAT_NONE:
-      break;
-   case TGSI_SAT_ZERO_ONE:
-      /*TXT( "_SAT" );*/
-      break;
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      /*TXT( "_SAT[-1,1]" );*/
-      break;
-   default:
-      assert( 0 );
-   }
-
-   /* store results  */
-   for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
-      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
-
-      if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
-         storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
-      } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) {
-         storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
-      } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) {
-         storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
-      } else {
-         fprintf(stderr, "ERROR: unsupported LLVM destination!");
-         assert(!"wrong destination");
-      }
-   }
-}
-
-static llvm::Module *
-tgsi_to_llvm(struct gallivm_prog *prog, const struct tgsi_token *tokens)
-{
-   llvm::Module *mod = createBaseShader();
-   struct tgsi_parse_context parse;
-   struct tgsi_full_instruction fi;
-   struct tgsi_full_declaration fd;
-   unsigned instno = 0;
-   Function* shader = mod->getFunction("execute_shader");
-   std::ostringstream stream;
-   if (prog->type == GALLIVM_VS) {
-      stream << "vs_shader";
-   } else {
-      stream << "fs_shader";
-   }
-   stream << prog->id;
-   std::string func_name = stream.str();
-   shader->setName(func_name.c_str());
-
-   Function::arg_iterator args = shader->arg_begin();
-   Value *ptr_INPUT = args++;
-   ptr_INPUT->setName("input");
-
-   BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
-
-   tgsi_parse_init(&parse, tokens);
-
-   fi = tgsi_default_full_instruction();
-   fd = tgsi_default_full_declaration();
-   Storage storage(label_entry, ptr_INPUT);
-   Instructions instr(mod, shader, label_entry, &storage);
-   while(!tgsi_parse_end_of_tokens(&parse)) {
-      tgsi_parse_token(&parse);
-
-      switch (parse.FullToken.Token.Type) {
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         translate_declaration(prog, mod, &storage,
-                               &parse.FullToken.FullDeclaration,
-                               &fd);
-         break;
-
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         translate_immediate(&storage,
-                             &parse.FullToken.FullImmediate);
-         break;
-
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         translate_instruction(mod, &storage, &instr,
-                               &parse.FullToken.FullInstruction,
-                               &fi, instno);
-         ++instno;
-         break;
-
-      default:
-         assert(0);
-      }
-   }
-
-   tgsi_parse_free(&parse);
-
-   prog->num_consts = storage.numConsts();
-   return mod;
-}
-
-/*!
-  Translates the TGSI tokens into LLVM format. Translated representation
-  is stored in the gallivm_prog and returned.
-  After calling this function the gallivm_prog can either be used with a custom
-  code generator to generate machine code for the GPU which the code generator
-  addresses or it can be jit compiled with gallivm_cpu_jit_compile and executed
-  with gallivm_prog_exec to run the module on the CPU.
- */
-struct gallivm_prog *
-gallivm_from_tgsi(const struct tgsi_token *tokens, enum gallivm_shader_type type)
-{
-   std::cout << "Creating llvm from: " <<std::endl;
-   ++GLOBAL_ID;
-   struct gallivm_prog *gallivm =
-      (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog));
-   gallivm->id = GLOBAL_ID;
-   gallivm->type = type;
-   tgsi_dump(tokens, 0);
-
-   llvm::Module *mod = tgsi_to_llvm(gallivm, tokens);
-   gallivm->module = mod;
-   gallivm_prog_dump(gallivm, 0);
-
-   /* Run optimization passes over it */
-   PassManager passes;
-   passes.add(new TargetData(mod));
-   AddStandardCompilePasses(passes);
-   passes.run(*mod);
-
-   gallivm->module = mod;
-
-   gallivm_prog_dump(gallivm, 0);
-
-   return gallivm;
-}
-
-
 void gallivm_prog_delete(struct gallivm_prog *prog)
 {
-   llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
-   delete mod;
+   delete prog->module;
    prog->module = 0;
    prog->function = 0;
    free(prog);
 }
 
-typedef void (*vertex_shader_runner)(float (*ainputs)[PIPE_MAX_SHADER_INPUTS][4],
-                                     float (*dests)[PIPE_MAX_SHADER_INPUTS][4],
-                                     float (*aconsts)[4],
-                                     int num_vertices,
-                                     int num_inputs,
-                                     int num_attribs,
-                                     int num_consts);
-
-
-/*!
-  This function is used to execute the gallivm_prog in software. Before calling
-  this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile
-  function.
- */
-int gallivm_prog_exec(struct gallivm_prog *prog,
-                      float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
-                      float (*dests)[PIPE_MAX_SHADER_INPUTS][4],
-                      float (*consts)[4],
-                      int num_vertices,
-                      int num_inputs,
-                      int num_attribs)
-{
-   vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function);
-   assert(runner);
-   runner(inputs, dests, consts, num_vertices, num_inputs,
-          num_attribs, prog->num_consts);
-
-   return 0;
-}
-
-
-
 static inline void
 constant_interpolation(float (*inputs)[16][4],
                        const struct tgsi_interp_coef *coefs,
@@ -919,40 +195,15 @@ perspective_interpolation(float (*inputs)[16][4],
    }
 }
 
-typedef int (*fragment_shader_runner)(float x, float y,
-                                      float (*dests)[16][4],
-                                      float (*inputs)[16][4],
-                                      int num_attribs,
-                                      float (*consts)[4], int num_consts,
-                                      struct tgsi_sampler *samplers);
-
-int gallivm_fragment_shader_exec(struct gallivm_prog *prog,
-                                 float fx, float fy,
-                                 float (*dests)[16][4],
-                                 float (*inputs)[16][4],
-                                 float (*consts)[4],
-                                 struct tgsi_sampler *samplers)
-{
-   fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function);
-   assert(runner);
-
-   return runner(fx, fy, dests, inputs, prog->num_interp,
-                 consts, prog->num_consts,
-                 samplers);
-}
-
-void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix)
+void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix)
 {
-   llvm::Module *mod;
-   if (!prog || !prog->module)
+   if (!ir || !ir->module)
       return;
 
-   mod = static_cast<llvm::Module*>(prog->module);
-
    if (file_prefix) {
       std::ostringstream stream;
       stream << file_prefix;
-      stream << prog->id;
+      stream << ir->id;
       stream << ".ll";
       std::string name = stream.str();
       std::ofstream out(name.c_str());
@@ -960,12 +211,12 @@ void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix)
          std::cerr<<"Can't open file : "<<stream.str()<<std::endl;;
          return;
       }
-      out << (*mod);
+      out << (*ir->module);
       out.close();
    } else {
-      const llvm::Module::FunctionListType &funcs = mod->getFunctionList();
+      const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList();
       llvm::Module::FunctionListType::const_iterator itr;
-      std::cout<<"; ---------- Start shader "<<prog->id<<std::endl;
+      std::cout<<"; ---------- Start shader "<<ir->id<<std::endl;
       for (itr = funcs.begin(); itr != funcs.end(); ++itr) {
          const llvm::Function &func = (*itr);
          std::string name = func.getName();
@@ -978,86 +229,10 @@ void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix)
             std::cout<<*found<<std::endl;
          }
       }
-      std::cout<<"; ---------- End shader "<<prog->id<<std::endl;
-   }
-}
-
-
-static struct gallivm_cpu_engine *CPU = 0;
-
-static inline llvm::Function *func_for_shader(struct gallivm_prog *prog)
-{
-   llvm::Module *mod = prog->module;
-   llvm::Function *func = 0;
-
-   switch (prog->type) {
-   case GALLIVM_VS:
-      func = mod->getFunction("run_vertex_shader");
-      break;
-   case GALLIVM_FS:
-      func = mod->getFunction("run_fragment_shader");
-      break;
-   default:
-      assert(!"Unknown shader type!");
-      break;
+      std::cout<<"; ---------- End shader "<<ir->id<<std::endl;
    }
-   return func;
 }
 
-/*!
-  This function creates a CPU based execution engine for the given gallivm_prog.
-  gallivm_cpu_engine should be used as a singleton throughout the library. Before
-  executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile.
-  The gallivm_prog instance which is being passed to the constructor is being
-  automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile
-  with it again.
- */
-struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog)
-{
-   struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *)
-                                    calloc(1, sizeof(struct gallivm_cpu_engine));
-   llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
-   llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
-   llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false);
-   ee->DisableLazyCompilation();
-   cpu->engine = ee;
-
-   llvm::Function *func = func_for_shader(prog);
-
-   prog->function = ee->getPointerToFunction(func);
-   CPU = cpu;
-   return cpu;
-}
-
-
-/*!
-  This function JIT compiles the given gallivm_prog with the given cpu based execution engine.
-  The reference to the generated machine code entry point will be stored
-  in the gallivm_prog program. After executing this function one can call gallivm_prog_exec
-  in order to execute the gallivm_prog on the CPU.
- */
-void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog)
-{
-   llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
-   llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
-   llvm::ExecutionEngine *ee = cpu->engine;
-   assert(ee);
-   ee->DisableLazyCompilation();
-   ee->addModuleProvider(mp);
-
-   llvm::Function *func = func_for_shader(prog);
-   prog->function = ee->getPointerToFunction(func);
-}
-
-void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu)
-{
-   free(cpu);
-}
-
-struct gallivm_cpu_engine * gallivm_global_cpu_engine()
-{
-   return CPU;
-}
 
 void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
                                      float (*inputs)[16][4],
@@ -1084,9 +259,66 @@ void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
    }
 }
 
-#endif /* MESA_LLVM */
 
+struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type)
+{
+   struct gallivm_ir *ir =
+      (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir));
+   ++GLOBAL_ID;
+   ir->id   = GLOBAL_ID;
+   ir->type = type;
+
+   return ir;
+}
+
+void gallivm_ir_set_layout(struct gallivm_ir *ir,
+                           enum gallivm_vector_layout layout)
+{
+   ir->layout = layout;
+}
+
+void gallivm_ir_set_components(struct gallivm_ir *ir, int num)
+{
+   ir->num_components = num;
+}
 
+void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir,
+                               const struct tgsi_token *tokens)
+{
+   std::cout << "Creating llvm from: " <<std::endl;
+   tgsi_dump(tokens, 0);
 
 
+   llvm::Module *irmod = tgsi_to_llvmir(ir, tokens);
 
+   llvm::Module *mod = tgsi_to_llvm(ir, tokens);
+   ir->module = mod;
+   gallivm_ir_dump(ir, 0);
+}
+
+void gallivm_ir_delete(struct gallivm_ir *ir)
+{
+   delete ir->module;
+   free(ir);
+}
+
+struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir)
+{
+   struct gallivm_prog *prog =
+      (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog));
+   llvm::Module *mod = llvm::CloneModule(ir->module);
+   prog->num_consts = ir->num_consts;
+   memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators));
+   prog->num_interp = ir->num_interp;
+
+   /* Run optimization passes over it */
+   PassManager passes;
+   passes.add(new TargetData(mod));
+   AddStandardCompilePasses(passes);
+   passes.run(*mod);
+   prog->module = mod;
+
+   return prog;
+}
+
+#endif /* MESA_LLVM */
diff --git a/src/mesa/pipe/llvm/gallivm.h b/src/mesa/pipe/llvm/gallivm.h
index 4695de3127..b104520cb7 100644
--- a/src/mesa/pipe/llvm/gallivm.h
+++ b/src/mesa/pipe/llvm/gallivm.h
@@ -43,31 +43,34 @@ extern "C" {
 
 struct tgsi_token;
 
+struct gallivm_ir;
 struct gallivm_prog;
 struct gallivm_cpu_engine;
 struct tgsi_interp_coef;
 struct tgsi_sampler;
+struct tgsi_exec_vector;
 
 enum gallivm_shader_type {
    GALLIVM_VS,
    GALLIVM_FS
 };
 
-struct gallivm_prog *gallivm_from_tgsi(const struct tgsi_token *tokens, enum gallivm_shader_type type);
-void gallivm_prog_delete(struct gallivm_prog *prog);
-int gallivm_prog_exec(struct gallivm_prog *prog,
-                      float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
-                      float (*dests)[PIPE_MAX_SHADER_INPUTS][4],
-                      float (*consts)[4],
-                      int num_vertices,
-                      int num_inputs,
-                      int num_attribs);
-int gallivm_fragment_shader_exec(struct gallivm_prog *prog,
-                                 float x, float y,
-                                 float (*dests)[PIPE_MAX_SHADER_INPUTS][4],
-                                 float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
-                                 float (*consts)[4],
-                                 struct tgsi_sampler *samplers);
+enum gallivm_vector_layout {
+   GALLIVM_AOS,
+   GALLIVM_SOA
+};
+
+struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type);
+void               gallivm_ir_set_layout(struct gallivm_ir *ir,
+                                         enum gallivm_vector_layout layout);
+void               gallivm_ir_set_components(struct gallivm_ir *ir, int num);
+void               gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir,
+                                             const struct tgsi_token *tokens);
+void               gallivm_ir_delete(struct gallivm_ir *ir);
+
+
+struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir);
+
 void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
                                      float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
                                      const struct tgsi_interp_coef *coefs);
@@ -76,9 +79,20 @@ void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix);
 
 struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog);
 struct gallivm_cpu_engine *gallivm_global_cpu_engine();
+int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
+                        struct tgsi_exec_vector       *inputs,
+                        struct tgsi_exec_vector       *dests,
+                        float (*consts)[4]);
+int gallivm_cpu_fs_exec(struct gallivm_prog *prog,
+                        float x, float y,
+                        float (*dests)[PIPE_MAX_SHADER_INPUTS][4],
+                        float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
+                        float (*consts)[4],
+                        struct tgsi_sampler *samplers);
 void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog);
 void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee);
 
+
 #endif /* MESA_LLVM */
 
 #if defined __cplusplus
diff --git a/src/mesa/pipe/llvm/gallivm_cpu.cpp b/src/mesa/pipe/llvm/gallivm_cpu.cpp
new file mode 100644
index 0000000000..5f1268bf4f
--- /dev/null
+++ b/src/mesa/pipe/llvm/gallivm_cpu.cpp
@@ -0,0 +1,204 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+  * Authors:
+  *   Zack Rusin zack@tungstengraphics.com
+  */
+#ifdef MESA_LLVM
+
+#include "gallivm.h"
+#include "gallivm_p.h"
+
+#include "instructions.h"
+#include "loweringpass.h"
+#include "storage.h"
+#include "tgsitollvm.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "pipe/tgsi/exec/tgsi_exec.h"
+#include "pipe/tgsi/util/tgsi_dump.h"
+
+#include <llvm/Module.h>
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/Instructions.h>
+#include <llvm/ModuleProvider.h>
+#include <llvm/Pass.h>
+#include <llvm/PassManager.h>
+#include <llvm/ParameterAttributes.h>
+#include <llvm/Support/PatternMatch.h>
+#include <llvm/ExecutionEngine/JIT.h>
+#include <llvm/ExecutionEngine/Interpreter.h>
+#include <llvm/ExecutionEngine/GenericValue.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/LinkAllPasses.h>
+#include <llvm/Analysis/Verifier.h>
+#include <llvm/Analysis/LoopPass.h>
+#include <llvm/Target/TargetData.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+
+struct gallivm_cpu_engine {
+   llvm::ExecutionEngine *engine;
+};
+
+static struct gallivm_cpu_engine *CPU = 0;
+
+typedef int (*fragment_shader_runner)(float x, float y,
+                                      float (*dests)[16][4],
+                                      float (*inputs)[16][4],
+                                      int num_attribs,
+                                      float (*consts)[4], int num_consts,
+                                      struct tgsi_sampler *samplers);
+
+int gallivm_cpu_fs_exec(struct gallivm_prog *prog,
+                        float fx, float fy,
+                        float (*dests)[16][4],
+                        float (*inputs)[16][4],
+                        float (*consts)[4],
+                        struct tgsi_sampler *samplers)
+{
+   fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function);
+   assert(runner);
+
+   return runner(fx, fy, dests, inputs, prog->num_interp,
+                 consts, prog->num_consts,
+                 samplers);
+}
+
+static inline llvm::Function *func_for_shader(struct gallivm_prog *prog)
+{
+   llvm::Module *mod = prog->module;
+   llvm::Function *func = 0;
+
+   switch (prog->type) {
+   case GALLIVM_VS:
+      func = mod->getFunction("run_vertex_shader");
+      break;
+   case GALLIVM_FS:
+      func = mod->getFunction("run_fragment_shader");
+      break;
+   default:
+      assert(!"Unknown shader type!");
+      break;
+   }
+   return func;
+}
+
+/*!
+  This function creates a CPU based execution engine for the given gallivm_prog.
+  gallivm_cpu_engine should be used as a singleton throughout the library. Before
+  executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile.
+  The gallivm_prog instance which is being passed to the constructor is being
+  automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile
+  with it again.
+ */
+struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog)
+{
+   struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *)
+                                    calloc(1, sizeof(struct gallivm_cpu_engine));
+   llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
+   llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
+   llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false);
+   ee->DisableLazyCompilation();
+   cpu->engine = ee;
+
+   llvm::Function *func = func_for_shader(prog);
+
+   prog->function = ee->getPointerToFunction(func);
+   CPU = cpu;
+   return cpu;
+}
+
+
+/*!
+  This function JIT compiles the given gallivm_prog with the given cpu based execution engine.
+  The reference to the generated machine code entry point will be stored
+  in the gallivm_prog program. After executing this function one can call gallivm_prog_exec
+  in order to execute the gallivm_prog on the CPU.
+ */
+void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog)
+{
+   llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
+   llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
+   llvm::ExecutionEngine *ee = cpu->engine;
+   assert(ee);
+   /*FIXME : remove */
+   ee->DisableLazyCompilation();
+   ee->addModuleProvider(mp);
+
+   llvm::Function *func = func_for_shader(prog);
+   prog->function = ee->getPointerToFunction(func);
+}
+
+void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu)
+{
+   free(cpu);
+}
+
+struct gallivm_cpu_engine * gallivm_global_cpu_engine()
+{
+   return CPU;
+}
+
+
+typedef void (*vertex_shader_runner)(void *ainputs,
+                                     void *dests,
+                                     float (*aconsts)[4],
+                                     int num_vertices,
+                                     int num_inputs,
+                                     int num_attribs,
+                                     int num_consts);
+
+
+/*!
+  This function is used to execute the gallivm_prog in software. Before calling
+  this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile
+  function.
+ */
+int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
+                        struct tgsi_exec_vector       *inputs,
+                        struct tgsi_exec_vector       *dests,
+                         float (*consts)[4])
+{
+   vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function);
+   assert(runner);
+   /*FIXME*/
+   runner(inputs, dests, consts, 4, 4, 4, prog->num_consts);
+
+   return 0;
+}
+
+#endif
diff --git a/src/mesa/pipe/llvm/gallivm_p.h b/src/mesa/pipe/llvm/gallivm_p.h
new file mode 100644
index 0000000000..2c6e5e8f5f
--- /dev/null
+++ b/src/mesa/pipe/llvm/gallivm_p.h
@@ -0,0 +1,56 @@
+#ifndef GALLIVM_P_H
+#define GALLIVM_P_H
+
+#ifdef MESA_LLVM
+
+namespace llvm {
+   class Module;
+}
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+enum gallivm_shader_type;
+enum gallivm_vector_layout;
+
+struct gallivm_interpolate {
+   int attrib;
+   int chan;
+   int type;
+};
+
+struct gallivm_ir {
+   llvm::Module *module;
+   int id;
+   enum gallivm_shader_type type;
+   enum gallivm_vector_layout layout;
+   int num_components;
+   int   num_consts;
+
+   //FIXME: this might not be enough for some shaders
+   struct gallivm_interpolate interpolators[32*4];
+   int   num_interp;
+};
+
+struct gallivm_prog {
+   llvm::Module *module;
+   void *function;
+
+   int   id;
+   enum gallivm_shader_type type;
+
+   int   num_consts;
+
+   //FIXME: this might not be enough for some shaders
+   struct gallivm_interpolate interpolators[32*4];
+   int   num_interp;
+};
+
+#endif /* MESA_LLVM */
+
+#if defined __cplusplus
+} // extern "C"
+#endif
+
+#endif
diff --git a/src/mesa/pipe/llvm/instructionssoa.cpp b/src/mesa/pipe/llvm/instructionssoa.cpp
new file mode 100644
index 0000000000..9ac4d8fbc7
--- /dev/null
+++ b/src/mesa/pipe/llvm/instructionssoa.cpp
@@ -0,0 +1,26 @@
+#include "instructionssoa.h"
+
+InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func,
+                                 llvm::BasicBlock *block, StorageSoa *storage)
+{
+}
+
+std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1,
+                                               const std::vector<llvm::Value*> in2)
+{
+   std::vector<llvm::Value*> res(4);
+
+   return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1,
+                                               const std::vector<llvm::Value*> in2)
+{
+   std::vector<llvm::Value*> res(4);
+
+   return res;
+}
+
+void InstructionsSoa::end()
+{
+}
diff --git a/src/mesa/pipe/llvm/instructionssoa.h b/src/mesa/pipe/llvm/instructionssoa.h
new file mode 100644
index 0000000000..0b6b41cf05
--- /dev/null
+++ b/src/mesa/pipe/llvm/instructionssoa.h
@@ -0,0 +1,55 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef INSTRUCTIONSSOA_H
+#define INSTRUCTIONSSOA_H
+
+#include <vector>
+
+namespace llvm {
+   class Module;
+   class Function;
+   class BasicBlock;
+   class Value;
+}
+class StorageSoa;
+
+class InstructionsSoa
+{
+public:
+   InstructionsSoa(llvm::Module *mod, llvm::Function *func,
+                   llvm::BasicBlock *block, StorageSoa *storage);
+
+   std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1,
+                                 const std::vector<llvm::Value*> in2);
+   std::vector<llvm::Value*> mul(const std::vector<llvm::Value*> in1,
+                                 const std::vector<llvm::Value*> in2);
+   void         end();
+};
+
+
+#endif
diff --git a/src/mesa/pipe/llvm/llvm_base_shader.cpp b/src/mesa/pipe/llvm/llvm_base_shader.cpp
index b574b550ae..90a25a440a 100644
--- a/src/mesa/pipe/llvm/llvm_base_shader.cpp
+++ b/src/mesa/pipe/llvm/llvm_base_shader.cpp
@@ -104,8 +104,8 @@ Module* createBaseShader() {
     /*isVarArg=*/false);
   
   std::vector<const Type*>FuncTy_18_args;
-  FuncTy_18_args.push_back(PointerTy_12);
-  FuncTy_18_args.push_back(PointerTy_12);
+  FuncTy_18_args.push_back(PointerTy_9);
+  FuncTy_18_args.push_back(PointerTy_9);
   FuncTy_18_args.push_back(PointerTy_7);
   FuncTy_18_args.push_back(IntegerType::get(32));
   FuncTy_18_args.push_back(IntegerType::get(32));
@@ -526,14 +526,23 @@ Module* createBaseShader() {
     BasicBlock* label_forbody_preheader_i = new BasicBlock("forbody.preheader.i",func_run_vertex_shader,0);
     BasicBlock* label_forbody_i = new BasicBlock("forbody.i",func_run_vertex_shader,0);
     BasicBlock* label_from_consts_exit = new BasicBlock("from_consts.exit",func_run_vertex_shader,0);
-    BasicBlock* label_forbody_preheader_91 = new BasicBlock("forbody.preheader",func_run_vertex_shader,0);
-    BasicBlock* label_forbody_92 = new BasicBlock("forbody",func_run_vertex_shader,0);
-    BasicBlock* label_afterfor_93 = new BasicBlock("afterfor",func_run_vertex_shader,0);
     
     // Block entry (label_entry_90)
     AllocaInst* ptr_consts = new AllocaInst(ArrayTy_20, "consts", label_entry_90);
     AllocaInst* ptr_temps = new AllocaInst(ArrayTy_22, "temps", label_entry_90);
     AllocaInst* ptr_args = new AllocaInst(StructTy_struct_ShaderInput, "args", label_entry_90);
+    std::vector<Value*> ptr_tmp_indices;
+    ptr_tmp_indices.push_back(const_int32_29);
+    ptr_tmp_indices.push_back(const_int32_29);
+    Instruction* ptr_tmp = new GetElementPtrInst(ptr_args, ptr_tmp_indices.begin(), ptr_tmp_indices.end(), "tmp", label_entry_90);
+    CastInst* ptr_conv = new BitCastInst(ptr_results, PointerTy_0, "conv", label_entry_90);
+    StoreInst* void_91 = new StoreInst(ptr_conv, ptr_tmp, false, label_entry_90);
+    std::vector<Value*> ptr_tmp2_indices;
+    ptr_tmp2_indices.push_back(const_int32_29);
+    ptr_tmp2_indices.push_back(const_int32_31);
+    Instruction* ptr_tmp2 = new GetElementPtrInst(ptr_args, ptr_tmp2_indices.begin(), ptr_tmp2_indices.end(), "tmp2", label_entry_90);
+    CastInst* ptr_conv4 = new BitCastInst(ptr_inputs, PointerTy_0, "conv4", label_entry_90);
+    StoreInst* void_92 = new StoreInst(ptr_conv4, ptr_tmp2, false, label_entry_90);
     ICmpInst* int1_cmp_i = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts, const_int32_29, "cmp.i", label_entry_90);
     new BranchInst(label_forbody_preheader_i, label_from_consts_exit, int1_cmp_i, label_entry_90);
     
@@ -544,17 +553,17 @@ Module* createBaseShader() {
     new BranchInst(label_forbody_i, label_forbody_preheader_i);
     
     // Block forbody.i (label_forbody_i)
-    Argument* fwdref_96 = new Argument(IntegerType::get(32));
+    Argument* fwdref_95 = new Argument(IntegerType::get(32));
     PHINode* int32_i_0_reg2mem_0_i = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i", label_forbody_i);
     int32_i_0_reg2mem_0_i->reserveOperandSpace(2);
     int32_i_0_reg2mem_0_i->addIncoming(const_int32_29, label_forbody_preheader_i);
-    int32_i_0_reg2mem_0_i->addIncoming(fwdref_96, label_forbody_i);
+    int32_i_0_reg2mem_0_i->addIncoming(fwdref_95, label_forbody_i);
     
-    Argument* fwdref_97 = new Argument(VectorTy_1);
+    Argument* fwdref_96 = new Argument(VectorTy_1);
     PHINode* packed_vec_0_reg2mem_0_i = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i", label_forbody_i);
     packed_vec_0_reg2mem_0_i->reserveOperandSpace(2);
     packed_vec_0_reg2mem_0_i->addIncoming(const_packed_32, label_forbody_preheader_i);
-    packed_vec_0_reg2mem_0_i->addIncoming(fwdref_97, label_forbody_i);
+    packed_vec_0_reg2mem_0_i->addIncoming(fwdref_96, label_forbody_i);
     
     std::vector<Value*> ptr_arraydecay_i_indices;
     ptr_arraydecay_i_indices.push_back(int32_i_0_reg2mem_0_i);
@@ -584,80 +593,40 @@ Module* createBaseShader() {
     ptr_arrayidx34_i_indices.push_back(const_int32_29);
     ptr_arrayidx34_i_indices.push_back(int32_i_0_reg2mem_0_i);
     Instruction* ptr_arrayidx34_i = new GetElementPtrInst(ptr_consts, ptr_arrayidx34_i_indices.begin(), ptr_arrayidx34_i_indices.end(), "arrayidx34.i", label_forbody_i);
-    StoreInst* void_98 = new StoreInst(packed_tmp31_i, ptr_arrayidx34_i, false, label_forbody_i);
-    BinaryOperator* int32_indvar_next8 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i, const_int32_31, "indvar.next8", label_forbody_i);
-    ICmpInst* int1_exitcond9 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next8, int32_tmp10_i, "exitcond9", label_forbody_i);
-    new BranchInst(label_from_consts_exit, label_forbody_i, int1_exitcond9, label_forbody_i);
+    StoreInst* void_97 = new StoreInst(packed_tmp31_i, ptr_arrayidx34_i, false, label_forbody_i);
+    BinaryOperator* int32_indvar_next_98 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i, const_int32_31, "indvar.next", label_forbody_i);
+    ICmpInst* int1_exitcond_99 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next_98, int32_tmp10_i, "exitcond", label_forbody_i);
+    new BranchInst(label_from_consts_exit, label_forbody_i, int1_exitcond_99, label_forbody_i);
     
     // Block from_consts.exit (label_from_consts_exit)
-    std::vector<Value*> ptr_tmp2_indices;
-    ptr_tmp2_indices.push_back(const_int32_29);
-    ptr_tmp2_indices.push_back(const_int32_34);
-    Instruction* ptr_tmp2 = new GetElementPtrInst(ptr_args, ptr_tmp2_indices.begin(), ptr_tmp2_indices.end(), "tmp2", label_from_consts_exit);
-    std::vector<Value*> ptr_arraydecay3_indices;
-    ptr_arraydecay3_indices.push_back(const_int32_29);
-    ptr_arraydecay3_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay3 = new GetElementPtrInst(ptr_consts, ptr_arraydecay3_indices.begin(), ptr_arraydecay3_indices.end(), "arraydecay3", label_from_consts_exit);
-    StoreInst* void_100 = new StoreInst(ptr_arraydecay3, ptr_tmp2, false, label_from_consts_exit);
-    std::vector<Value*> ptr_tmp4_indices;
-    ptr_tmp4_indices.push_back(const_int32_29);
-    ptr_tmp4_indices.push_back(const_int32_33);
-    Instruction* ptr_tmp4 = new GetElementPtrInst(ptr_args, ptr_tmp4_indices.begin(), ptr_tmp4_indices.end(), "tmp4", label_from_consts_exit);
-    std::vector<Value*> ptr_arraydecay5_indices;
-    ptr_arraydecay5_indices.push_back(const_int32_29);
-    ptr_arraydecay5_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay5 = new GetElementPtrInst(ptr_temps, ptr_arraydecay5_indices.begin(), ptr_arraydecay5_indices.end(), "arraydecay5", label_from_consts_exit);
-    StoreInst* void_101 = new StoreInst(ptr_arraydecay5, ptr_tmp4, false, label_from_consts_exit);
-    ICmpInst* int1_cmp_102 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_vertices, const_int32_29, "cmp", label_from_consts_exit);
-    new BranchInst(label_forbody_preheader_91, label_afterfor_93, int1_cmp_102, label_from_consts_exit);
-    
-    // Block forbody.preheader (label_forbody_preheader_91)
-    std::vector<Value*> ptr_tmp8_indices;
-    ptr_tmp8_indices.push_back(const_int32_29);
-    ptr_tmp8_indices.push_back(const_int32_29);
-    Instruction* ptr_tmp8 = new GetElementPtrInst(ptr_args, ptr_tmp8_indices.begin(), ptr_tmp8_indices.end(), "tmp8", label_forbody_preheader_91);
-    std::vector<Value*> ptr_tmp12_indices;
-    ptr_tmp12_indices.push_back(const_int32_29);
-    ptr_tmp12_indices.push_back(const_int32_31);
-    Instruction* ptr_tmp12 = new GetElementPtrInst(ptr_args, ptr_tmp12_indices.begin(), ptr_tmp12_indices.end(), "tmp12", label_forbody_preheader_91);
-    BinaryOperator* int32_tmp_104 = BinaryOperator::create(Instruction::Add, int32_num_vertices, const_int32_30, "tmp", label_forbody_preheader_91);
-    ICmpInst* int1_tmp6 = new ICmpInst(ICmpInst::ICMP_SLT, int32_tmp_104, const_int32_29, "tmp6", label_forbody_preheader_91);
-    SelectInst* int32_tmp7 = new SelectInst(int1_tmp6, const_int32_31, int32_num_vertices, "tmp7", label_forbody_preheader_91);
-    new BranchInst(label_forbody_92, label_forbody_preheader_91);
-    
-    // Block forbody (label_forbody_92)
-    Argument* fwdref_107 = new Argument(IntegerType::get(32));
-    PHINode* int32_i_0_reg2mem_0_106 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody_92);
-    int32_i_0_reg2mem_0_106->reserveOperandSpace(2);
-    int32_i_0_reg2mem_0_106->addIncoming(const_int32_29, label_forbody_preheader_91);
-    int32_i_0_reg2mem_0_106->addIncoming(fwdref_107, label_forbody_92);
-    
-    std::vector<Value*> ptr_arraydecay11_108_indices;
-    ptr_arraydecay11_108_indices.push_back(int32_i_0_reg2mem_0_106);
-    ptr_arraydecay11_108_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay11_108 = new GetElementPtrInst(ptr_results, ptr_arraydecay11_108_indices.begin(), ptr_arraydecay11_108_indices.end(), "arraydecay11", label_forbody_92);
-    StoreInst* void_109 = new StoreInst(ptr_arraydecay11_108, ptr_tmp8, false, label_forbody_92);
-    std::vector<Value*> ptr_arraydecay16_indices;
-    ptr_arraydecay16_indices.push_back(int32_i_0_reg2mem_0_106);
-    ptr_arraydecay16_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay16 = new GetElementPtrInst(ptr_inputs, ptr_arraydecay16_indices.begin(), ptr_arraydecay16_indices.end(), "arraydecay16", label_forbody_92);
-    StoreInst* void_110 = new StoreInst(ptr_arraydecay16, ptr_tmp12, false, label_forbody_92);
-    CallInst* void_111 = new CallInst(func_execute_shader, ptr_args, "", label_forbody_92);
-    void_111->setCallingConv(CallingConv::C);
-    void_111->setTailCall(false);const ParamAttrsList *void_111_PAL = 0;
-    void_111->setParamAttrs(void_111_PAL);
-    
-    BinaryOperator* int32_indvar_next_112 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_106, const_int32_31, "indvar.next", label_forbody_92);
-    ICmpInst* int1_exitcond_113 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next_112, int32_tmp7, "exitcond", label_forbody_92);
-    new BranchInst(label_afterfor_93, label_forbody_92, int1_exitcond_113, label_forbody_92);
-    
-    // Block afterfor (label_afterfor_93)
-    new ReturnInst(label_afterfor_93);
+    std::vector<Value*> ptr_tmp7_indices;
+    ptr_tmp7_indices.push_back(const_int32_29);
+    ptr_tmp7_indices.push_back(const_int32_34);
+    Instruction* ptr_tmp7 = new GetElementPtrInst(ptr_args, ptr_tmp7_indices.begin(), ptr_tmp7_indices.end(), "tmp7", label_from_consts_exit);
+    std::vector<Value*> ptr_arraydecay8_indices;
+    ptr_arraydecay8_indices.push_back(const_int32_29);
+    ptr_arraydecay8_indices.push_back(const_int32_29);
+    Instruction* ptr_arraydecay8 = new GetElementPtrInst(ptr_consts, ptr_arraydecay8_indices.begin(), ptr_arraydecay8_indices.end(), "arraydecay8", label_from_consts_exit);
+    StoreInst* void_101 = new StoreInst(ptr_arraydecay8, ptr_tmp7, false, label_from_consts_exit);
+    std::vector<Value*> ptr_tmp9_indices;
+    ptr_tmp9_indices.push_back(const_int32_29);
+    ptr_tmp9_indices.push_back(const_int32_33);
+    Instruction* ptr_tmp9 = new GetElementPtrInst(ptr_args, ptr_tmp9_indices.begin(), ptr_tmp9_indices.end(), "tmp9", label_from_consts_exit);
+    std::vector<Value*> ptr_arraydecay10_indices;
+    ptr_arraydecay10_indices.push_back(const_int32_29);
+    ptr_arraydecay10_indices.push_back(const_int32_29);
+    Instruction* ptr_arraydecay10 = new GetElementPtrInst(ptr_temps, ptr_arraydecay10_indices.begin(), ptr_arraydecay10_indices.end(), "arraydecay10", label_from_consts_exit);
+    StoreInst* void_102 = new StoreInst(ptr_arraydecay10, ptr_tmp9, false, label_from_consts_exit);
+    CallInst* void_103 = new CallInst(func_execute_shader, ptr_args, "", label_from_consts_exit);
+    void_103->setCallingConv(CallingConv::C);
+    void_103->setTailCall(false);const ParamAttrsList *void_103_PAL = 0;
+    void_103->setParamAttrs(void_103_PAL);
+    
+    new ReturnInst(label_from_consts_exit);
     
     // Resolve Forward References
-    fwdref_107->replaceAllUsesWith(int32_indvar_next_112); delete fwdref_107;
-    fwdref_97->replaceAllUsesWith(packed_tmp31_i); delete fwdref_97;
-    fwdref_96->replaceAllUsesWith(int32_indvar_next8); delete fwdref_96;
+    fwdref_96->replaceAllUsesWith(packed_tmp31_i); delete fwdref_96;
+    fwdref_95->replaceAllUsesWith(int32_indvar_next_98); delete fwdref_95;
     
   }
   
@@ -668,195 +637,195 @@ Module* createBaseShader() {
     float_x->setName("x");
     Value* float_y = args++;
     float_y->setName("y");
-    Value* ptr_results_116 = args++;
-    ptr_results_116->setName("results");
-    Value* ptr_inputs_117 = args++;
-    ptr_inputs_117->setName("inputs");
-    Value* int32_num_inputs_118 = args++;
-    int32_num_inputs_118->setName("num_inputs");
-    Value* ptr_aconsts_119 = args++;
-    ptr_aconsts_119->setName("aconsts");
-    Value* int32_num_consts_120 = args++;
-    int32_num_consts_120->setName("num_consts");
+    Value* ptr_results_105 = args++;
+    ptr_results_105->setName("results");
+    Value* ptr_inputs_106 = args++;
+    ptr_inputs_106->setName("inputs");
+    Value* int32_num_inputs_107 = args++;
+    int32_num_inputs_107->setName("num_inputs");
+    Value* ptr_aconsts_108 = args++;
+    ptr_aconsts_108->setName("aconsts");
+    Value* int32_num_consts_109 = args++;
+    int32_num_consts_109->setName("num_consts");
     Value* ptr_samplers = args++;
     ptr_samplers->setName("samplers");
     
-    BasicBlock* label_entry_121 = new BasicBlock("entry",func_run_fragment_shader,0);
-    BasicBlock* label_forbody_preheader_i_122 = new BasicBlock("forbody.preheader.i",func_run_fragment_shader,0);
-    BasicBlock* label_forbody_i_123 = new BasicBlock("forbody.i",func_run_fragment_shader,0);
-    BasicBlock* label_from_consts_exit_124 = new BasicBlock("from_consts.exit",func_run_fragment_shader,0);
-    
-    // Block entry (label_entry_121)
-    AllocaInst* ptr_consts_125 = new AllocaInst(ArrayTy_20, "consts", label_entry_121);
-    AllocaInst* ptr_temps_126 = new AllocaInst(ArrayTy_22, "temps", label_entry_121);
-    AllocaInst* ptr_args_127 = new AllocaInst(StructTy_struct_ShaderInput, "args", label_entry_121);
-    std::vector<Value*> ptr_tmp_indices;
-    ptr_tmp_indices.push_back(const_int32_29);
-    ptr_tmp_indices.push_back(const_int32_35);
-    Instruction* ptr_tmp = new GetElementPtrInst(ptr_args_127, ptr_tmp_indices.begin(), ptr_tmp_indices.end(), "tmp", label_entry_121);
-    StoreInst* void_128 = new StoreInst(const_int32_29, ptr_tmp, false, label_entry_121);
-    ICmpInst* int1_cmp_i_129 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts_120, const_int32_29, "cmp.i", label_entry_121);
-    new BranchInst(label_forbody_preheader_i_122, label_from_consts_exit_124, int1_cmp_i_129, label_entry_121);
-    
-    // Block forbody.preheader.i (label_forbody_preheader_i_122)
-    BinaryOperator* int32_tmp_i_131 = BinaryOperator::create(Instruction::Add, int32_num_consts_120, const_int32_30, "tmp.i", label_forbody_preheader_i_122);
-    ICmpInst* int1_tmp9_i_132 = new ICmpInst(ICmpInst::ICMP_SLT, int32_tmp_i_131, const_int32_29, "tmp9.i", label_forbody_preheader_i_122);
-    SelectInst* int32_tmp10_i_133 = new SelectInst(int1_tmp9_i_132, const_int32_31, int32_num_consts_120, "tmp10.i", label_forbody_preheader_i_122);
-    new BranchInst(label_forbody_i_123, label_forbody_preheader_i_122);
-    
-    // Block forbody.i (label_forbody_i_123)
-    Argument* fwdref_136 = new Argument(IntegerType::get(32));
-    PHINode* int32_i_0_reg2mem_0_i_135 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i", label_forbody_i_123);
-    int32_i_0_reg2mem_0_i_135->reserveOperandSpace(2);
-    int32_i_0_reg2mem_0_i_135->addIncoming(const_int32_29, label_forbody_preheader_i_122);
-    int32_i_0_reg2mem_0_i_135->addIncoming(fwdref_136, label_forbody_i_123);
-    
-    Argument* fwdref_138 = new Argument(VectorTy_1);
-    PHINode* packed_vec_0_reg2mem_0_i_137 = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i", label_forbody_i_123);
-    packed_vec_0_reg2mem_0_i_137->reserveOperandSpace(2);
-    packed_vec_0_reg2mem_0_i_137->addIncoming(const_packed_32, label_forbody_preheader_i_122);
-    packed_vec_0_reg2mem_0_i_137->addIncoming(fwdref_138, label_forbody_i_123);
-    
-    std::vector<Value*> ptr_arraydecay_i_139_indices;
-    ptr_arraydecay_i_139_indices.push_back(int32_i_0_reg2mem_0_i_135);
-    ptr_arraydecay_i_139_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay_i_139 = new GetElementPtrInst(ptr_aconsts_119, ptr_arraydecay_i_139_indices.begin(), ptr_arraydecay_i_139_indices.end(), "arraydecay.i", label_forbody_i_123);
-    LoadInst* float_tmp5_i_140 = new LoadInst(ptr_arraydecay_i_139, "tmp5.i", false, label_forbody_i_123);
-    InsertElementInst* packed_tmp7_i_141 = new InsertElementInst(packed_vec_0_reg2mem_0_i_137, float_tmp5_i_140, const_int32_29, "tmp7.i", label_forbody_i_123);
-    std::vector<Value*> ptr_arrayidx12_i_142_indices;
-    ptr_arrayidx12_i_142_indices.push_back(int32_i_0_reg2mem_0_i_135);
-    ptr_arrayidx12_i_142_indices.push_back(const_int32_31);
-    Instruction* ptr_arrayidx12_i_142 = new GetElementPtrInst(ptr_aconsts_119, ptr_arrayidx12_i_142_indices.begin(), ptr_arrayidx12_i_142_indices.end(), "arrayidx12.i", label_forbody_i_123);
-    LoadInst* float_tmp13_i_143 = new LoadInst(ptr_arrayidx12_i_142, "tmp13.i", false, label_forbody_i_123);
-    InsertElementInst* packed_tmp15_i_144 = new InsertElementInst(packed_tmp7_i_141, float_tmp13_i_143, const_int32_31, "tmp15.i", label_forbody_i_123);
-    std::vector<Value*> ptr_arrayidx20_i_145_indices;
-    ptr_arrayidx20_i_145_indices.push_back(int32_i_0_reg2mem_0_i_135);
-    ptr_arrayidx20_i_145_indices.push_back(const_int32_33);
-    Instruction* ptr_arrayidx20_i_145 = new GetElementPtrInst(ptr_aconsts_119, ptr_arrayidx20_i_145_indices.begin(), ptr_arrayidx20_i_145_indices.end(), "arrayidx20.i", label_forbody_i_123);
-    LoadInst* float_tmp21_i_146 = new LoadInst(ptr_arrayidx20_i_145, "tmp21.i", false, label_forbody_i_123);
-    InsertElementInst* packed_tmp23_i_147 = new InsertElementInst(packed_tmp15_i_144, float_tmp21_i_146, const_int32_33, "tmp23.i", label_forbody_i_123);
-    std::vector<Value*> ptr_arrayidx28_i_148_indices;
-    ptr_arrayidx28_i_148_indices.push_back(int32_i_0_reg2mem_0_i_135);
-    ptr_arrayidx28_i_148_indices.push_back(const_int32_34);
-    Instruction* ptr_arrayidx28_i_148 = new GetElementPtrInst(ptr_aconsts_119, ptr_arrayidx28_i_148_indices.begin(), ptr_arrayidx28_i_148_indices.end(), "arrayidx28.i", label_forbody_i_123);
-    LoadInst* float_tmp29_i_149 = new LoadInst(ptr_arrayidx28_i_148, "tmp29.i", false, label_forbody_i_123);
-    InsertElementInst* packed_tmp31_i_150 = new InsertElementInst(packed_tmp23_i_147, float_tmp29_i_149, const_int32_34, "tmp31.i", label_forbody_i_123);
-    std::vector<Value*> ptr_arrayidx34_i_151_indices;
-    ptr_arrayidx34_i_151_indices.push_back(const_int32_29);
-    ptr_arrayidx34_i_151_indices.push_back(int32_i_0_reg2mem_0_i_135);
-    Instruction* ptr_arrayidx34_i_151 = new GetElementPtrInst(ptr_consts_125, ptr_arrayidx34_i_151_indices.begin(), ptr_arrayidx34_i_151_indices.end(), "arrayidx34.i", label_forbody_i_123);
-    StoreInst* void_152 = new StoreInst(packed_tmp31_i_150, ptr_arrayidx34_i_151, false, label_forbody_i_123);
-    BinaryOperator* int32_indvar_next7 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i_135, const_int32_31, "indvar.next7", label_forbody_i_123);
-    ICmpInst* int1_exitcond8 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next7, int32_tmp10_i_133, "exitcond8", label_forbody_i_123);
-    new BranchInst(label_from_consts_exit_124, label_forbody_i_123, int1_exitcond8, label_forbody_i_123);
-    
-    // Block from_consts.exit (label_from_consts_exit_124)
+    BasicBlock* label_entry_110 = new BasicBlock("entry",func_run_fragment_shader,0);
+    BasicBlock* label_forbody_preheader_i_111 = new BasicBlock("forbody.preheader.i",func_run_fragment_shader,0);
+    BasicBlock* label_forbody_i_112 = new BasicBlock("forbody.i",func_run_fragment_shader,0);
+    BasicBlock* label_from_consts_exit_113 = new BasicBlock("from_consts.exit",func_run_fragment_shader,0);
+    
+    // Block entry (label_entry_110)
+    AllocaInst* ptr_consts_114 = new AllocaInst(ArrayTy_20, "consts", label_entry_110);
+    AllocaInst* ptr_temps_115 = new AllocaInst(ArrayTy_22, "temps", label_entry_110);
+    AllocaInst* ptr_args_116 = new AllocaInst(StructTy_struct_ShaderInput, "args", label_entry_110);
+    std::vector<Value*> ptr_tmp_117_indices;
+    ptr_tmp_117_indices.push_back(const_int32_29);
+    ptr_tmp_117_indices.push_back(const_int32_35);
+    Instruction* ptr_tmp_117 = new GetElementPtrInst(ptr_args_116, ptr_tmp_117_indices.begin(), ptr_tmp_117_indices.end(), "tmp", label_entry_110);
+    StoreInst* void_118 = new StoreInst(const_int32_29, ptr_tmp_117, false, label_entry_110);
+    ICmpInst* int1_cmp_i_119 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts_109, const_int32_29, "cmp.i", label_entry_110);
+    new BranchInst(label_forbody_preheader_i_111, label_from_consts_exit_113, int1_cmp_i_119, label_entry_110);
+    
+    // Block forbody.preheader.i (label_forbody_preheader_i_111)
+    BinaryOperator* int32_tmp_i_121 = BinaryOperator::create(Instruction::Add, int32_num_consts_109, const_int32_30, "tmp.i", label_forbody_preheader_i_111);
+    ICmpInst* int1_tmp9_i_122 = new ICmpInst(ICmpInst::ICMP_SLT, int32_tmp_i_121, const_int32_29, "tmp9.i", label_forbody_preheader_i_111);
+    SelectInst* int32_tmp10_i_123 = new SelectInst(int1_tmp9_i_122, const_int32_31, int32_num_consts_109, "tmp10.i", label_forbody_preheader_i_111);
+    new BranchInst(label_forbody_i_112, label_forbody_preheader_i_111);
+    
+    // Block forbody.i (label_forbody_i_112)
+    Argument* fwdref_126 = new Argument(IntegerType::get(32));
+    PHINode* int32_i_0_reg2mem_0_i_125 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i", label_forbody_i_112);
+    int32_i_0_reg2mem_0_i_125->reserveOperandSpace(2);
+    int32_i_0_reg2mem_0_i_125->addIncoming(const_int32_29, label_forbody_preheader_i_111);
+    int32_i_0_reg2mem_0_i_125->addIncoming(fwdref_126, label_forbody_i_112);
+    
+    Argument* fwdref_128 = new Argument(VectorTy_1);
+    PHINode* packed_vec_0_reg2mem_0_i_127 = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i", label_forbody_i_112);
+    packed_vec_0_reg2mem_0_i_127->reserveOperandSpace(2);
+    packed_vec_0_reg2mem_0_i_127->addIncoming(const_packed_32, label_forbody_preheader_i_111);
+    packed_vec_0_reg2mem_0_i_127->addIncoming(fwdref_128, label_forbody_i_112);
+    
+    std::vector<Value*> ptr_arraydecay_i_129_indices;
+    ptr_arraydecay_i_129_indices.push_back(int32_i_0_reg2mem_0_i_125);
+    ptr_arraydecay_i_129_indices.push_back(const_int32_29);
+    Instruction* ptr_arraydecay_i_129 = new GetElementPtrInst(ptr_aconsts_108, ptr_arraydecay_i_129_indices.begin(), ptr_arraydecay_i_129_indices.end(), "arraydecay.i", label_forbody_i_112);
+    LoadInst* float_tmp5_i_130 = new LoadInst(ptr_arraydecay_i_129, "tmp5.i", false, label_forbody_i_112);
+    InsertElementInst* packed_tmp7_i_131 = new InsertElementInst(packed_vec_0_reg2mem_0_i_127, float_tmp5_i_130, const_int32_29, "tmp7.i", label_forbody_i_112);
+    std::vector<Value*> ptr_arrayidx12_i_132_indices;
+    ptr_arrayidx12_i_132_indices.push_back(int32_i_0_reg2mem_0_i_125);
+    ptr_arrayidx12_i_132_indices.push_back(const_int32_31);
+    Instruction* ptr_arrayidx12_i_132 = new GetElementPtrInst(ptr_aconsts_108, ptr_arrayidx12_i_132_indices.begin(), ptr_arrayidx12_i_132_indices.end(), "arrayidx12.i", label_forbody_i_112);
+    LoadInst* float_tmp13_i_133 = new LoadInst(ptr_arrayidx12_i_132, "tmp13.i", false, label_forbody_i_112);
+    InsertElementInst* packed_tmp15_i_134 = new InsertElementInst(packed_tmp7_i_131, float_tmp13_i_133, const_int32_31, "tmp15.i", label_forbody_i_112);
+    std::vector<Value*> ptr_arrayidx20_i_135_indices;
+    ptr_arrayidx20_i_135_indices.push_back(int32_i_0_reg2mem_0_i_125);
+    ptr_arrayidx20_i_135_indices.push_back(const_int32_33);
+    Instruction* ptr_arrayidx20_i_135 = new GetElementPtrInst(ptr_aconsts_108, ptr_arrayidx20_i_135_indices.begin(), ptr_arrayidx20_i_135_indices.end(), "arrayidx20.i", label_forbody_i_112);
+    LoadInst* float_tmp21_i_136 = new LoadInst(ptr_arrayidx20_i_135, "tmp21.i", false, label_forbody_i_112);
+    InsertElementInst* packed_tmp23_i_137 = new InsertElementInst(packed_tmp15_i_134, float_tmp21_i_136, const_int32_33, "tmp23.i", label_forbody_i_112);
+    std::vector<Value*> ptr_arrayidx28_i_138_indices;
+    ptr_arrayidx28_i_138_indices.push_back(int32_i_0_reg2mem_0_i_125);
+    ptr_arrayidx28_i_138_indices.push_back(const_int32_34);
+    Instruction* ptr_arrayidx28_i_138 = new GetElementPtrInst(ptr_aconsts_108, ptr_arrayidx28_i_138_indices.begin(), ptr_arrayidx28_i_138_indices.end(), "arrayidx28.i", label_forbody_i_112);
+    LoadInst* float_tmp29_i_139 = new LoadInst(ptr_arrayidx28_i_138, "tmp29.i", false, label_forbody_i_112);
+    InsertElementInst* packed_tmp31_i_140 = new InsertElementInst(packed_tmp23_i_137, float_tmp29_i_139, const_int32_34, "tmp31.i", label_forbody_i_112);
+    std::vector<Value*> ptr_arrayidx34_i_141_indices;
+    ptr_arrayidx34_i_141_indices.push_back(const_int32_29);
+    ptr_arrayidx34_i_141_indices.push_back(int32_i_0_reg2mem_0_i_125);
+    Instruction* ptr_arrayidx34_i_141 = new GetElementPtrInst(ptr_consts_114, ptr_arrayidx34_i_141_indices.begin(), ptr_arrayidx34_i_141_indices.end(), "arrayidx34.i", label_forbody_i_112);
+    StoreInst* void_142 = new StoreInst(packed_tmp31_i_140, ptr_arrayidx34_i_141, false, label_forbody_i_112);
+    BinaryOperator* int32_indvar_next7 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i_125, const_int32_31, "indvar.next7", label_forbody_i_112);
+    ICmpInst* int1_exitcond8 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next7, int32_tmp10_i_123, "exitcond8", label_forbody_i_112);
+    new BranchInst(label_from_consts_exit_113, label_forbody_i_112, int1_exitcond8, label_forbody_i_112);
+    
+    // Block from_consts.exit (label_from_consts_exit_113)
     std::vector<Value*> ptr_tmp3_indices;
     ptr_tmp3_indices.push_back(const_int32_29);
     ptr_tmp3_indices.push_back(const_int32_34);
-    Instruction* ptr_tmp3 = new GetElementPtrInst(ptr_args_127, ptr_tmp3_indices.begin(), ptr_tmp3_indices.end(), "tmp3", label_from_consts_exit_124);
+    Instruction* ptr_tmp3 = new GetElementPtrInst(ptr_args_116, ptr_tmp3_indices.begin(), ptr_tmp3_indices.end(), "tmp3", label_from_consts_exit_113);
     std::vector<Value*> ptr_arraydecay4_indices;
     ptr_arraydecay4_indices.push_back(const_int32_29);
     ptr_arraydecay4_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay4 = new GetElementPtrInst(ptr_consts_125, ptr_arraydecay4_indices.begin(), ptr_arraydecay4_indices.end(), "arraydecay4", label_from_consts_exit_124);
-    StoreInst* void_154 = new StoreInst(ptr_arraydecay4, ptr_tmp3, false, label_from_consts_exit_124);
+    Instruction* ptr_arraydecay4 = new GetElementPtrInst(ptr_consts_114, ptr_arraydecay4_indices.begin(), ptr_arraydecay4_indices.end(), "arraydecay4", label_from_consts_exit_113);
+    StoreInst* void_144 = new StoreInst(ptr_arraydecay4, ptr_tmp3, false, label_from_consts_exit_113);
     std::vector<Value*> ptr_tmp5_indices;
     ptr_tmp5_indices.push_back(const_int32_29);
     ptr_tmp5_indices.push_back(const_int32_33);
-    Instruction* ptr_tmp5 = new GetElementPtrInst(ptr_args_127, ptr_tmp5_indices.begin(), ptr_tmp5_indices.end(), "tmp5", label_from_consts_exit_124);
+    Instruction* ptr_tmp5 = new GetElementPtrInst(ptr_args_116, ptr_tmp5_indices.begin(), ptr_tmp5_indices.end(), "tmp5", label_from_consts_exit_113);
     std::vector<Value*> ptr_arraydecay6_indices;
     ptr_arraydecay6_indices.push_back(const_int32_29);
     ptr_arraydecay6_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay6 = new GetElementPtrInst(ptr_temps_126, ptr_arraydecay6_indices.begin(), ptr_arraydecay6_indices.end(), "arraydecay6", label_from_consts_exit_124);
-    StoreInst* void_155 = new StoreInst(ptr_arraydecay6, ptr_tmp5, false, label_from_consts_exit_124);
-    std::vector<Value*> ptr_tmp8_156_indices;
-    ptr_tmp8_156_indices.push_back(const_int32_29);
-    ptr_tmp8_156_indices.push_back(const_int32_31);
-    Instruction* ptr_tmp8_156 = new GetElementPtrInst(ptr_args_127, ptr_tmp8_156_indices.begin(), ptr_tmp8_156_indices.end(), "tmp8", label_from_consts_exit_124);
-    std::vector<Value*> ptr_tmp12_157_indices;
-    ptr_tmp12_157_indices.push_back(const_int32_29);
-    ptr_tmp12_157_indices.push_back(const_int32_29);
-    Instruction* ptr_tmp12_157 = new GetElementPtrInst(ptr_args_127, ptr_tmp12_157_indices.begin(), ptr_tmp12_157_indices.end(), "tmp12", label_from_consts_exit_124);
-    std::vector<Value*> ptr_arraydecay11_158_indices;
-    ptr_arraydecay11_158_indices.push_back(const_int32_29);
-    ptr_arraydecay11_158_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay11_158 = new GetElementPtrInst(ptr_inputs_117, ptr_arraydecay11_158_indices.begin(), ptr_arraydecay11_158_indices.end(), "arraydecay11", label_from_consts_exit_124);
-    StoreInst* void_159 = new StoreInst(ptr_arraydecay11_158, ptr_tmp8_156, false, label_from_consts_exit_124);
-    std::vector<Value*> ptr_arraydecay16_160_indices;
-    ptr_arraydecay16_160_indices.push_back(const_int32_29);
-    ptr_arraydecay16_160_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay16_160 = new GetElementPtrInst(ptr_results_116, ptr_arraydecay16_160_indices.begin(), ptr_arraydecay16_160_indices.end(), "arraydecay16", label_from_consts_exit_124);
-    StoreInst* void_161 = new StoreInst(ptr_arraydecay16_160, ptr_tmp12_157, false, label_from_consts_exit_124);
-    StoreInst* void_162 = new StoreInst(const_int32_29, ptr_tmp, false, label_from_consts_exit_124);
-    CallInst* void_163 = new CallInst(func_execute_shader, ptr_args_127, "", label_from_consts_exit_124);
-    void_163->setCallingConv(CallingConv::C);
-    void_163->setTailCall(false);const ParamAttrsList *void_163_PAL = 0;
-    void_163->setParamAttrs(void_163_PAL);
-    
-    LoadInst* int32_tmp23 = new LoadInst(ptr_tmp, "tmp23", false, label_from_consts_exit_124);
+    Instruction* ptr_arraydecay6 = new GetElementPtrInst(ptr_temps_115, ptr_arraydecay6_indices.begin(), ptr_arraydecay6_indices.end(), "arraydecay6", label_from_consts_exit_113);
+    StoreInst* void_145 = new StoreInst(ptr_arraydecay6, ptr_tmp5, false, label_from_consts_exit_113);
+    std::vector<Value*> ptr_tmp8_indices;
+    ptr_tmp8_indices.push_back(const_int32_29);
+    ptr_tmp8_indices.push_back(const_int32_31);
+    Instruction* ptr_tmp8 = new GetElementPtrInst(ptr_args_116, ptr_tmp8_indices.begin(), ptr_tmp8_indices.end(), "tmp8", label_from_consts_exit_113);
+    std::vector<Value*> ptr_tmp12_indices;
+    ptr_tmp12_indices.push_back(const_int32_29);
+    ptr_tmp12_indices.push_back(const_int32_29);
+    Instruction* ptr_tmp12 = new GetElementPtrInst(ptr_args_116, ptr_tmp12_indices.begin(), ptr_tmp12_indices.end(), "tmp12", label_from_consts_exit_113);
+    std::vector<Value*> ptr_arraydecay11_146_indices;
+    ptr_arraydecay11_146_indices.push_back(const_int32_29);
+    ptr_arraydecay11_146_indices.push_back(const_int32_29);
+    Instruction* ptr_arraydecay11_146 = new GetElementPtrInst(ptr_inputs_106, ptr_arraydecay11_146_indices.begin(), ptr_arraydecay11_146_indices.end(), "arraydecay11", label_from_consts_exit_113);
+    StoreInst* void_147 = new StoreInst(ptr_arraydecay11_146, ptr_tmp8, false, label_from_consts_exit_113);
+    std::vector<Value*> ptr_arraydecay16_indices;
+    ptr_arraydecay16_indices.push_back(const_int32_29);
+    ptr_arraydecay16_indices.push_back(const_int32_29);
+    Instruction* ptr_arraydecay16 = new GetElementPtrInst(ptr_results_105, ptr_arraydecay16_indices.begin(), ptr_arraydecay16_indices.end(), "arraydecay16", label_from_consts_exit_113);
+    StoreInst* void_148 = new StoreInst(ptr_arraydecay16, ptr_tmp12, false, label_from_consts_exit_113);
+    StoreInst* void_149 = new StoreInst(const_int32_29, ptr_tmp_117, false, label_from_consts_exit_113);
+    CallInst* void_150 = new CallInst(func_execute_shader, ptr_args_116, "", label_from_consts_exit_113);
+    void_150->setCallingConv(CallingConv::C);
+    void_150->setTailCall(false);const ParamAttrsList *void_150_PAL = 0;
+    void_150->setParamAttrs(void_150_PAL);
+    
+    LoadInst* int32_tmp23 = new LoadInst(ptr_tmp_117, "tmp23", false, label_from_consts_exit_113);
     std::vector<Value*> ptr_arraydecay11_1_indices;
     ptr_arraydecay11_1_indices.push_back(const_int32_31);
     ptr_arraydecay11_1_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay11_1 = new GetElementPtrInst(ptr_inputs_117, ptr_arraydecay11_1_indices.begin(), ptr_arraydecay11_1_indices.end(), "arraydecay11.1", label_from_consts_exit_124);
-    StoreInst* void_164 = new StoreInst(ptr_arraydecay11_1, ptr_tmp8_156, false, label_from_consts_exit_124);
+    Instruction* ptr_arraydecay11_1 = new GetElementPtrInst(ptr_inputs_106, ptr_arraydecay11_1_indices.begin(), ptr_arraydecay11_1_indices.end(), "arraydecay11.1", label_from_consts_exit_113);
+    StoreInst* void_151 = new StoreInst(ptr_arraydecay11_1, ptr_tmp8, false, label_from_consts_exit_113);
     std::vector<Value*> ptr_arraydecay16_1_indices;
     ptr_arraydecay16_1_indices.push_back(const_int32_31);
     ptr_arraydecay16_1_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay16_1 = new GetElementPtrInst(ptr_results_116, ptr_arraydecay16_1_indices.begin(), ptr_arraydecay16_1_indices.end(), "arraydecay16.1", label_from_consts_exit_124);
-    StoreInst* void_165 = new StoreInst(ptr_arraydecay16_1, ptr_tmp12_157, false, label_from_consts_exit_124);
-    StoreInst* void_166 = new StoreInst(const_int32_29, ptr_tmp, false, label_from_consts_exit_124);
-    CallInst* void_167 = new CallInst(func_execute_shader, ptr_args_127, "", label_from_consts_exit_124);
-    void_167->setCallingConv(CallingConv::C);
-    void_167->setTailCall(false);const ParamAttrsList *void_167_PAL = 0;
-    void_167->setParamAttrs(void_167_PAL);
-    
-    LoadInst* int32_tmp23_1 = new LoadInst(ptr_tmp, "tmp23.1", false, label_from_consts_exit_124);
-    BinaryOperator* int32_shl_1 = BinaryOperator::create(Instruction::Shl, int32_tmp23_1, const_int32_31, "shl.1", label_from_consts_exit_124);
-    BinaryOperator* int32_or_1 = BinaryOperator::create(Instruction::Or, int32_shl_1, int32_tmp23, "or.1", label_from_consts_exit_124);
+    Instruction* ptr_arraydecay16_1 = new GetElementPtrInst(ptr_results_105, ptr_arraydecay16_1_indices.begin(), ptr_arraydecay16_1_indices.end(), "arraydecay16.1", label_from_consts_exit_113);
+    StoreInst* void_152 = new StoreInst(ptr_arraydecay16_1, ptr_tmp12, false, label_from_consts_exit_113);
+    StoreInst* void_153 = new StoreInst(const_int32_29, ptr_tmp_117, false, label_from_consts_exit_113);
+    CallInst* void_154 = new CallInst(func_execute_shader, ptr_args_116, "", label_from_consts_exit_113);
+    void_154->setCallingConv(CallingConv::C);
+    void_154->setTailCall(false);const ParamAttrsList *void_154_PAL = 0;
+    void_154->setParamAttrs(void_154_PAL);
+    
+    LoadInst* int32_tmp23_1 = new LoadInst(ptr_tmp_117, "tmp23.1", false, label_from_consts_exit_113);
+    BinaryOperator* int32_shl_1 = BinaryOperator::create(Instruction::Shl, int32_tmp23_1, const_int32_31, "shl.1", label_from_consts_exit_113);
+    BinaryOperator* int32_or_1 = BinaryOperator::create(Instruction::Or, int32_shl_1, int32_tmp23, "or.1", label_from_consts_exit_113);
     std::vector<Value*> ptr_arraydecay11_2_indices;
     ptr_arraydecay11_2_indices.push_back(const_int32_33);
     ptr_arraydecay11_2_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay11_2 = new GetElementPtrInst(ptr_inputs_117, ptr_arraydecay11_2_indices.begin(), ptr_arraydecay11_2_indices.end(), "arraydecay11.2", label_from_consts_exit_124);
-    StoreInst* void_168 = new StoreInst(ptr_arraydecay11_2, ptr_tmp8_156, false, label_from_consts_exit_124);
+    Instruction* ptr_arraydecay11_2 = new GetElementPtrInst(ptr_inputs_106, ptr_arraydecay11_2_indices.begin(), ptr_arraydecay11_2_indices.end(), "arraydecay11.2", label_from_consts_exit_113);
+    StoreInst* void_155 = new StoreInst(ptr_arraydecay11_2, ptr_tmp8, false, label_from_consts_exit_113);
     std::vector<Value*> ptr_arraydecay16_2_indices;
     ptr_arraydecay16_2_indices.push_back(const_int32_33);
     ptr_arraydecay16_2_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay16_2 = new GetElementPtrInst(ptr_results_116, ptr_arraydecay16_2_indices.begin(), ptr_arraydecay16_2_indices.end(), "arraydecay16.2", label_from_consts_exit_124);
-    StoreInst* void_169 = new StoreInst(ptr_arraydecay16_2, ptr_tmp12_157, false, label_from_consts_exit_124);
-    StoreInst* void_170 = new StoreInst(const_int32_29, ptr_tmp, false, label_from_consts_exit_124);
-    CallInst* void_171 = new CallInst(func_execute_shader, ptr_args_127, "", label_from_consts_exit_124);
-    void_171->setCallingConv(CallingConv::C);
-    void_171->setTailCall(false);const ParamAttrsList *void_171_PAL = 0;
-    void_171->setParamAttrs(void_171_PAL);
-    
-    LoadInst* int32_tmp23_2 = new LoadInst(ptr_tmp, "tmp23.2", false, label_from_consts_exit_124);
-    BinaryOperator* int32_shl_2 = BinaryOperator::create(Instruction::Shl, int32_tmp23_2, const_int32_33, "shl.2", label_from_consts_exit_124);
-    BinaryOperator* int32_or_2 = BinaryOperator::create(Instruction::Or, int32_shl_2, int32_or_1, "or.2", label_from_consts_exit_124);
+    Instruction* ptr_arraydecay16_2 = new GetElementPtrInst(ptr_results_105, ptr_arraydecay16_2_indices.begin(), ptr_arraydecay16_2_indices.end(), "arraydecay16.2", label_from_consts_exit_113);
+    StoreInst* void_156 = new StoreInst(ptr_arraydecay16_2, ptr_tmp12, false, label_from_consts_exit_113);
+    StoreInst* void_157 = new StoreInst(const_int32_29, ptr_tmp_117, false, label_from_consts_exit_113);
+    CallInst* void_158 = new CallInst(func_execute_shader, ptr_args_116, "", label_from_consts_exit_113);
+    void_158->setCallingConv(CallingConv::C);
+    void_158->setTailCall(false);const ParamAttrsList *void_158_PAL = 0;
+    void_158->setParamAttrs(void_158_PAL);
+    
+    LoadInst* int32_tmp23_2 = new LoadInst(ptr_tmp_117, "tmp23.2", false, label_from_consts_exit_113);
+    BinaryOperator* int32_shl_2 = BinaryOperator::create(Instruction::Shl, int32_tmp23_2, const_int32_33, "shl.2", label_from_consts_exit_113);
+    BinaryOperator* int32_or_2 = BinaryOperator::create(Instruction::Or, int32_shl_2, int32_or_1, "or.2", label_from_consts_exit_113);
     std::vector<Value*> ptr_arraydecay11_3_indices;
     ptr_arraydecay11_3_indices.push_back(const_int32_34);
     ptr_arraydecay11_3_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay11_3 = new GetElementPtrInst(ptr_inputs_117, ptr_arraydecay11_3_indices.begin(), ptr_arraydecay11_3_indices.end(), "arraydecay11.3", label_from_consts_exit_124);
-    StoreInst* void_172 = new StoreInst(ptr_arraydecay11_3, ptr_tmp8_156, false, label_from_consts_exit_124);
+    Instruction* ptr_arraydecay11_3 = new GetElementPtrInst(ptr_inputs_106, ptr_arraydecay11_3_indices.begin(), ptr_arraydecay11_3_indices.end(), "arraydecay11.3", label_from_consts_exit_113);
+    StoreInst* void_159 = new StoreInst(ptr_arraydecay11_3, ptr_tmp8, false, label_from_consts_exit_113);
     std::vector<Value*> ptr_arraydecay16_3_indices;
     ptr_arraydecay16_3_indices.push_back(const_int32_34);
     ptr_arraydecay16_3_indices.push_back(const_int32_29);
-    Instruction* ptr_arraydecay16_3 = new GetElementPtrInst(ptr_results_116, ptr_arraydecay16_3_indices.begin(), ptr_arraydecay16_3_indices.end(), "arraydecay16.3", label_from_consts_exit_124);
-    StoreInst* void_173 = new StoreInst(ptr_arraydecay16_3, ptr_tmp12_157, false, label_from_consts_exit_124);
-    StoreInst* void_174 = new StoreInst(const_int32_29, ptr_tmp, false, label_from_consts_exit_124);
-    CallInst* void_175 = new CallInst(func_execute_shader, ptr_args_127, "", label_from_consts_exit_124);
-    void_175->setCallingConv(CallingConv::C);
-    void_175->setTailCall(false);const ParamAttrsList *void_175_PAL = 0;
-    void_175->setParamAttrs(void_175_PAL);
-    
-    LoadInst* int32_tmp23_3 = new LoadInst(ptr_tmp, "tmp23.3", false, label_from_consts_exit_124);
-    BinaryOperator* int32_shl_3 = BinaryOperator::create(Instruction::Shl, int32_tmp23_3, const_int32_34, "shl.3", label_from_consts_exit_124);
-    BinaryOperator* int32_or_3 = BinaryOperator::create(Instruction::Or, int32_shl_3, int32_or_2, "or.3", label_from_consts_exit_124);
-    BinaryOperator* int32_neg = BinaryOperator::create(Instruction::Xor, int32_or_3, const_int32_30, "neg", label_from_consts_exit_124);
-    new ReturnInst(int32_neg, label_from_consts_exit_124);
+    Instruction* ptr_arraydecay16_3 = new GetElementPtrInst(ptr_results_105, ptr_arraydecay16_3_indices.begin(), ptr_arraydecay16_3_indices.end(), "arraydecay16.3", label_from_consts_exit_113);
+    StoreInst* void_160 = new StoreInst(ptr_arraydecay16_3, ptr_tmp12, false, label_from_consts_exit_113);
+    StoreInst* void_161 = new StoreInst(const_int32_29, ptr_tmp_117, false, label_from_consts_exit_113);
+    CallInst* void_162 = new CallInst(func_execute_shader, ptr_args_116, "", label_from_consts_exit_113);
+    void_162->setCallingConv(CallingConv::C);
+    void_162->setTailCall(false);const ParamAttrsList *void_162_PAL = 0;
+    void_162->setParamAttrs(void_162_PAL);
+    
+    LoadInst* int32_tmp23_3 = new LoadInst(ptr_tmp_117, "tmp23.3", false, label_from_consts_exit_113);
+    BinaryOperator* int32_shl_3 = BinaryOperator::create(Instruction::Shl, int32_tmp23_3, const_int32_34, "shl.3", label_from_consts_exit_113);
+    BinaryOperator* int32_or_3 = BinaryOperator::create(Instruction::Or, int32_shl_3, int32_or_2, "or.3", label_from_consts_exit_113);
+    BinaryOperator* int32_neg = BinaryOperator::create(Instruction::Xor, int32_or_3, const_int32_30, "neg", label_from_consts_exit_113);
+    new ReturnInst(int32_neg, label_from_consts_exit_113);
     
     // Resolve Forward References
-    fwdref_138->replaceAllUsesWith(packed_tmp31_i_150); delete fwdref_138;
-    fwdref_136->replaceAllUsesWith(int32_indvar_next7); delete fwdref_136;
+    fwdref_128->replaceAllUsesWith(packed_tmp31_i_140); delete fwdref_128;
+    fwdref_126->replaceAllUsesWith(int32_indvar_next7); delete fwdref_126;
     
   }
   
diff --git a/src/mesa/pipe/llvm/llvm_entry.c b/src/mesa/pipe/llvm/llvm_entry.c
index c3b34584e1..fa50b60e66 100644
--- a/src/mesa/pipe/llvm/llvm_entry.c
+++ b/src/mesa/pipe/llvm/llvm_entry.c
@@ -86,8 +86,8 @@ struct ShaderInput
 
 extern void execute_shader(struct ShaderInput *input);
 
-void run_vertex_shader(float4 (*inputs)[16],
-                       float4 (*results)[16],
+void run_vertex_shader(void *inputs,
+                       void *results,
                        float (*aconsts)[4],
                        int num_vertices,
                        int num_inputs,
@@ -98,16 +98,16 @@ void run_vertex_shader(float4 (*inputs)[16],
    float4  temps[128];//MAX_PROGRAM_TEMPS
 
    struct ShaderInput args;
+   args.dests  = results;
+   args.inputs = inputs;
+
    /*printf("XXX LLVM run_vertex_shader vertices = %d, inputs = %d, attribs = %d, consts = %d\n",
      num_vertices, num_inputs, num_attribs, num_consts);*/
    from_consts(consts, aconsts, num_consts);
    args.consts = consts;
    args.temps = temps;
-   for (int i = 0; i < num_vertices; ++i) {
-      args.dests  = results[i];
-      args.inputs = inputs[i];
-      execute_shader(&args);
-   }
+
+   execute_shader(&args);
 }
 
 
diff --git a/src/mesa/pipe/llvm/loweringpass.cpp b/src/mesa/pipe/llvm/loweringpass.cpp
new file mode 100644
index 0000000000..556dbec366
--- /dev/null
+++ b/src/mesa/pipe/llvm/loweringpass.cpp
@@ -0,0 +1,17 @@
+#include "loweringpass.h"
+
+using namespace llvm;
+
+char LoweringPass::ID = 0;
+RegisterPass<LoweringPass> X("lowering", "Lowering Pass");
+
+LoweringPass::LoweringPass()
+   :  ModulePass((intptr_t)&ID)
+{
+}
+
+bool LoweringPass::runOnModule(Module &m)
+{
+   llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n";
+   return false;
+}
diff --git a/src/mesa/pipe/llvm/loweringpass.h b/src/mesa/pipe/llvm/loweringpass.h
new file mode 100644
index 0000000000..f62dcf6ba7
--- /dev/null
+++ b/src/mesa/pipe/llvm/loweringpass.h
@@ -0,0 +1,15 @@
+#ifndef LOWERINGPASS_H
+#define LOWERINGPASS_H
+
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+
+struct LoweringPass : public llvm::ModulePass
+{
+   static char ID;
+   LoweringPass();
+
+   virtual bool runOnModule(llvm::Module &m);
+};
+
+#endif
diff --git a/src/mesa/pipe/llvm/storagesoa.cpp b/src/mesa/pipe/llvm/storagesoa.cpp
new file mode 100644
index 0000000000..b2aca3557a
--- /dev/null
+++ b/src/mesa/pipe/llvm/storagesoa.cpp
@@ -0,0 +1,117 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "storagesoa.h"
+
+
+#include "pipe/p_shader_tokens.h"
+#include <llvm/BasicBlock.h>
+#include <llvm/Module.h>
+#include <llvm/Value.h>
+
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/InstrTypes.h>
+#include <llvm/Instructions.h>
+
+using namespace llvm;
+
+StorageSoa::StorageSoa(llvm::BasicBlock *block,
+                       llvm::Value *input,
+                       llvm::Value *output,
+                       llvm::Value *consts)
+{
+}
+
+void StorageSoa::addImmediate(float *vec)
+{
+}
+
+llvm::Value *StorageSoa::addrElement(int idx) const
+{
+   return 0;
+}
+
+std::vector<llvm::Value*> StorageSoa::inputElement(int idx, int swizzle,
+                                                   llvm::Value *indIdx)
+{
+   std::vector<llvm::Value*> res(4);
+
+   return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::constElement(int idx, int swizzle,
+                                                   llvm::Value *indIdx)
+{
+   std::vector<llvm::Value*> res(4);
+
+   return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::outputElement(int idx, int swizzle,
+                                                    llvm::Value *indIdx)
+{
+   std::vector<llvm::Value*> res(4);
+
+   return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::tempElement(int idx, int swizzle,
+                                                  llvm::Value *indIdx)
+{
+   std::vector<llvm::Value*> res(4);
+
+   return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::immediateElement(int idx, int swizzle)
+{
+   std::vector<llvm::Value*> res(4);
+
+   return res;
+}
+
+llvm::Value * StorageSoa::extractIndex(llvm::Value *vec)
+{
+   return 0;
+}
+
+void StorageSoa::storeOutput(int dstIdx, const std::vector<llvm::Value*> &val,
+                             int mask)
+{
+}
+
+void StorageSoa::storeTemp(int idx, const std::vector<llvm::Value*> &val,
+                           int mask)
+{
+}
+
+void StorageSoa::storeAddress(int idx, const std::vector<llvm::Value*> &val,
+                              int mask)
+{
+}
diff --git a/src/mesa/pipe/llvm/storagesoa.h b/src/mesa/pipe/llvm/storagesoa.h
new file mode 100644
index 0000000000..551b0b9734
--- /dev/null
+++ b/src/mesa/pipe/llvm/storagesoa.h
@@ -0,0 +1,71 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef STORAGESOA_H
+#define STORAGESOA_H
+
+#include <vector>
+
+namespace llvm {
+   class BasicBlock;
+   class Constant;
+   class ConstantInt;
+   class LoadInst;
+   class Value;
+   class VectorType;
+}
+
+class StorageSoa
+{
+public:
+   StorageSoa(llvm::BasicBlock *block,
+              llvm::Value *input,
+              llvm::Value *output,
+              llvm::Value *consts);
+
+   void addImmediate(float *vec);
+
+   llvm::Value  * addrElement(int idx) const;
+
+   std::vector<llvm::Value*> inputElement(int idx, int swizzle, llvm::Value *indIdx =0);
+   std::vector<llvm::Value*> constElement(int idx, int swizzle, llvm::Value *indIdx =0);
+   std::vector<llvm::Value*> outputElement(int idx, int swizzle, llvm::Value *indIdx =0);
+   std::vector<llvm::Value*> tempElement(int idx, int swizzle, llvm::Value *indIdx =0);
+   std::vector<llvm::Value*> immediateElement(int idx, int swizzle);
+
+   llvm::Value *extractIndex(llvm::Value *vec);
+
+   void storeOutput(int dstIdx, const std::vector<llvm::Value*> &val,
+                         int mask);
+   void storeTemp(int idx, const std::vector<llvm::Value*> &val,
+                  int mask);
+   void storeAddress(int idx, const std::vector<llvm::Value*> &val,
+                     int mask);
+
+};
+
+#endif
diff --git a/src/mesa/pipe/llvm/tgsitollvm.cpp b/src/mesa/pipe/llvm/tgsitollvm.cpp
new file mode 100644
index 0000000000..bc4df61071
--- /dev/null
+++ b/src/mesa/pipe/llvm/tgsitollvm.cpp
@@ -0,0 +1,1174 @@
+#include "tgsitollvm.h"
+
+#include "gallivm.h"
+#include "gallivm_p.h"
+
+#include "storage.h"
+#include "instructions.h"
+#include "storagesoa.h"
+#include "instructionssoa.h"
+
+#include "pipe/p_shader_tokens.h"
+
+#include "pipe/tgsi/util/tgsi_parse.h"
+#include "pipe/tgsi/exec/tgsi_exec.h"
+#include "pipe/tgsi/util/tgsi_util.h"
+#include "pipe/tgsi/util/tgsi_build.h"
+#include "pipe/tgsi/util/tgsi_dump.h"
+
+
+#include <llvm/Module.h>
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/Instructions.h>
+#include <llvm/ModuleProvider.h>
+#include <llvm/Pass.h>
+#include <llvm/PassManager.h>
+#include <llvm/ParameterAttributes.h>
+#include <llvm/Support/PatternMatch.h>
+#include <llvm/ExecutionEngine/JIT.h>
+#include <llvm/ExecutionEngine/Interpreter.h>
+#include <llvm/ExecutionEngine/GenericValue.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/LinkAllPasses.h>
+#include <llvm/Analysis/Verifier.h>
+#include <llvm/Analysis/LoopPass.h>
+#include <llvm/Target/TargetData.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+
+using namespace llvm;
+#include "llvm_base_shader.cpp"
+
+static inline void
+add_interpolator(struct gallivm_ir *ir,
+                 struct gallivm_interpolate *interp)
+{
+   ir->interpolators[ir->num_interp] = *interp;
+   ++ir->num_interp;
+}
+
+static void
+translate_declaration(struct gallivm_ir *prog,
+                      llvm::Module *module,
+                      Storage *storage,
+                      struct tgsi_full_declaration *decl,
+                      struct tgsi_full_declaration *fd)
+{
+   if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      unsigned first, last, mask;
+      uint interp_method;
+
+      assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE);
+
+      first = decl->u.DeclarationRange.First;
+      last = decl->u.DeclarationRange.Last;
+      mask = decl->Declaration.UsageMask;
+
+      /* Do not touch WPOS.xy */
+      if (first == 0) {
+         mask &= ~TGSI_WRITEMASK_XY;
+         if (mask == TGSI_WRITEMASK_NONE) {
+            first++;
+            if (first > last) {
+               return;
+            }
+         }
+      }
+
+      interp_method = decl->Interpolation.Interpolate;
+
+      if (mask == TGSI_WRITEMASK_XYZW) {
+         unsigned i, j;
+
+         for (i = first; i <= last; i++) {
+            for (j = 0; j < NUM_CHANNELS; j++) {
+               //interp( mach, i, j );
+               struct gallivm_interpolate interp;
+               interp.type = interp_method;
+               interp.attrib = i;
+               interp.chan = j;
+               add_interpolator(prog, &interp);
+            }
+         }
+      } else {
+         unsigned i, j;
+         for( j = 0; j < NUM_CHANNELS; j++ ) {
+            if( mask & (1 << j) ) {
+               for( i = first; i <= last; i++ ) {
+                  struct gallivm_interpolate interp;
+                  interp.type = interp_method;
+                  interp.attrib = i;
+                  interp.chan = j;
+                  add_interpolator(prog, &interp);
+               }
+            }
+         }
+      }
+   }
+}
+
+static void
+translate_declarationir(struct gallivm_ir *,
+                      llvm::Module *,
+                      StorageSoa *,
+                      struct tgsi_full_declaration *,
+                      struct tgsi_full_declaration *)
+{
+}
+
+static void
+translate_immediate(Storage *storage,
+                    struct tgsi_full_immediate *imm)
+{
+   float vec[4];
+   int i;
+   for (i = 0; i < imm->Immediate.Size - 1; ++i) {
+      switch (imm->Immediate.DataType) {
+      case TGSI_IMM_FLOAT32:
+         vec[i] = imm->u.ImmediateFloat32[i].Float;
+         break;
+      default:
+         assert(0);
+      }
+   }
+   storage->addImmediate(vec);
+}
+
+
+static void
+translate_immediateir(StorageSoa *storage,
+                      struct tgsi_full_immediate *imm)
+{
+   float vec[4];
+   int i;
+   for (i = 0; i < imm->Immediate.Size - 1; ++i) {
+      switch (imm->Immediate.DataType) {
+      case TGSI_IMM_FLOAT32:
+         vec[i] = imm->u.ImmediateFloat32[i].Float;
+         break;
+      default:
+         assert(0);
+      }
+   }
+   storage->addImmediate(vec);
+}
+
+static inline int
+swizzleInt(struct tgsi_full_src_register *src)
+{
+   int swizzle = 0;
+   int start = 1000;
+
+   for (int k = 0; k < 4; ++k) {
+      swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start;
+      start /= 10;
+   }
+   return swizzle;
+}
+
+static inline llvm::Value *
+swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src,
+              Storage *storage)
+{
+   int swizzle = swizzleInt(src);
+   const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 +
+                          TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W;
+   if (swizzle != NO_SWIZZLE) {
+      /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/
+      val = storage->shuffleVector(val, swizzle);
+   }
+   return val;
+}
+
+static void
+translate_instruction(llvm::Module *module,
+                      Storage *storage,
+                      Instructions *instr,
+                      struct tgsi_full_instruction *inst,
+                      struct tgsi_full_instruction *fi,
+                      unsigned instno)
+{
+   llvm::Value *inputs[4];
+   inputs[0] = 0;
+   inputs[1] = 0;
+   inputs[2] = 0;
+   inputs[3] = 0;
+
+   for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
+      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+      llvm::Value *val = 0;
+      llvm::Value *indIdx = 0;
+
+      if (src->SrcRegister.Indirect) {
+         indIdx = storage->addrElement(src->SrcRegisterInd.Index);
+         indIdx = storage->extractIndex(indIdx);
+      }
+      if (src->SrcRegister.File == TGSI_FILE_CONSTANT) {
+         val = storage->constElement(src->SrcRegister.Index, indIdx);
+      } else if (src->SrcRegister.File == TGSI_FILE_INPUT) {
+         val = storage->inputElement(src->SrcRegister.Index, indIdx);
+      } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+         val = storage->tempElement(src->SrcRegister.Index);
+      } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) {
+         val = storage->outputElement(src->SrcRegister.Index, indIdx);
+      } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
+         val = storage->immediateElement(src->SrcRegister.Index);
+      } else {
+         fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File);
+         return;
+      }
+
+      inputs[i] = swizzleVector(val, src, storage);
+   }
+
+   /*if (inputs[0])
+     instr->printVector(inputs[0]);
+     if (inputs[1])
+     instr->printVector(inputs[1]);*/
+   llvm::Value *out = 0;
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_ARL: {
+      out = instr->arl(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_MOV: {
+      out = inputs[0];
+   }
+      break;
+   case TGSI_OPCODE_LIT: {
+      out = instr->lit(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_RCP: {
+      out = instr->rcp(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_RSQ: {
+      out = instr->rsq(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_EXP:
+      break;
+   case TGSI_OPCODE_LOG:
+      break;
+   case TGSI_OPCODE_MUL: {
+      out = instr->mul(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_ADD: {
+      out = instr->add(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_DP3: {
+      out = instr->dp3(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_DP4: {
+      out = instr->dp4(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_DST: {
+      out = instr->dst(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_MIN: {
+      out = instr->min(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_MAX: {
+      out = instr->max(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_SLT: {
+      out = instr->slt(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_SGE: {
+      out = instr->sge(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_MAD: {
+      out = instr->madd(inputs[0], inputs[1], inputs[2]);
+   }
+      break;
+   case TGSI_OPCODE_SUB: {
+      out = instr->sub(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_LERP: {
+      out = instr->lerp(inputs[0], inputs[1], inputs[2]);
+   }
+      break;
+   case TGSI_OPCODE_CND:
+      break;
+   case TGSI_OPCODE_CND0:
+      break;
+   case TGSI_OPCODE_DOT2ADD:
+      break;
+   case TGSI_OPCODE_INDEX:
+      break;
+   case TGSI_OPCODE_NEGATE:
+      break;
+   case TGSI_OPCODE_FRAC: {
+      out = instr->frc(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_CLAMP:
+      break;
+   case TGSI_OPCODE_FLOOR: {
+      out = instr->floor(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_ROUND:
+      break;
+   case TGSI_OPCODE_EXPBASE2: {
+      out = instr->ex2(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_LOGBASE2: {
+      out = instr->lg2(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_POWER: {
+      out = instr->pow(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_CROSSPRODUCT: {
+      out = instr->cross(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_MULTIPLYMATRIX:
+      break;
+   case TGSI_OPCODE_ABS: {
+      out = instr->abs(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_RCC:
+      break;
+   case TGSI_OPCODE_DPH: {
+      out = instr->dph(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_COS: {
+      out = instr->cos(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_DDX:
+      break;
+   case TGSI_OPCODE_DDY:
+      break;
+   case TGSI_OPCODE_KILP: {
+      out = instr->kilp(inputs[0]);
+      storage->setKilElement(out);
+      return;
+   }
+      break;
+   case TGSI_OPCODE_PK2H:
+      break;
+   case TGSI_OPCODE_PK2US:
+      break;
+   case TGSI_OPCODE_PK4B:
+      break;
+   case TGSI_OPCODE_PK4UB:
+      break;
+   case TGSI_OPCODE_RFL:
+      break;
+   case TGSI_OPCODE_SEQ:
+      break;
+   case TGSI_OPCODE_SFL:
+      break;
+   case TGSI_OPCODE_SGT: {
+      out = instr->sgt(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_SIN: {
+      out = instr->sin(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_SLE:
+      break;
+   case TGSI_OPCODE_SNE:
+      break;
+   case TGSI_OPCODE_STR:
+      break;
+   case TGSI_OPCODE_TEX:
+      break;
+   case TGSI_OPCODE_TXD:
+      break;
+   case TGSI_OPCODE_UP2H:
+      break;
+   case TGSI_OPCODE_UP2US:
+      break;
+   case TGSI_OPCODE_UP4B:
+      break;
+   case TGSI_OPCODE_UP4UB:
+      break;
+   case TGSI_OPCODE_X2D:
+      break;
+   case TGSI_OPCODE_ARA:
+      break;
+   case TGSI_OPCODE_ARR:
+      break;
+   case TGSI_OPCODE_BRA:
+      break;
+   case TGSI_OPCODE_CAL: {
+      instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr());
+      return;
+   }
+      break;
+   case TGSI_OPCODE_RET: {
+      instr->end();
+      return;
+   }
+      break;
+   case TGSI_OPCODE_SSG:
+      break;
+   case TGSI_OPCODE_CMP: {
+      out = instr->cmp(inputs[0], inputs[1], inputs[2]);
+   }
+      break;
+   case TGSI_OPCODE_SCS: {
+      out = instr->scs(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_TXB:
+      break;
+   case TGSI_OPCODE_NRM:
+      break;
+   case TGSI_OPCODE_DIV:
+      break;
+   case TGSI_OPCODE_DP2:
+      break;
+   case TGSI_OPCODE_TXL:
+      break;
+   case TGSI_OPCODE_BRK: {
+      instr->brk();
+      return;
+   }
+      break;
+   case TGSI_OPCODE_IF: {
+      instr->ifop(inputs[0]);
+      storage->setCurrentBlock(instr->currentBlock());
+      return;  //just update the state
+   }
+      break;
+   case TGSI_OPCODE_LOOP:
+      break;
+   case TGSI_OPCODE_REP:
+      break;
+   case TGSI_OPCODE_ELSE: {
+      instr->elseop();
+      storage->setCurrentBlock(instr->currentBlock());
+      return; //only state update
+   }
+      break;
+   case TGSI_OPCODE_ENDIF: {
+      instr->endif();
+      storage->setCurrentBlock(instr->currentBlock());
+      return; //just update the state
+   }
+      break;
+   case TGSI_OPCODE_ENDLOOP:
+      break;
+   case TGSI_OPCODE_ENDREP:
+      break;
+   case TGSI_OPCODE_PUSHA:
+      break;
+   case TGSI_OPCODE_POPA:
+      break;
+   case TGSI_OPCODE_CEIL:
+      break;
+   case TGSI_OPCODE_I2F:
+      break;
+   case TGSI_OPCODE_NOT:
+      break;
+   case TGSI_OPCODE_TRUNC: {
+      out = instr->trunc(inputs[0]);
+   }
+      break;
+   case TGSI_OPCODE_SHL:
+      break;
+   case TGSI_OPCODE_SHR:
+      break;
+   case TGSI_OPCODE_AND:
+      break;
+   case TGSI_OPCODE_OR:
+      break;
+   case TGSI_OPCODE_MOD:
+      break;
+   case TGSI_OPCODE_XOR:
+      break;
+   case TGSI_OPCODE_SAD:
+      break;
+   case TGSI_OPCODE_TXF:
+      break;
+   case TGSI_OPCODE_TXQ:
+      break;
+   case TGSI_OPCODE_CONT:
+      break;
+   case TGSI_OPCODE_EMIT:
+      break;
+   case TGSI_OPCODE_ENDPRIM:
+      break;
+   case TGSI_OPCODE_BGNLOOP2: {
+      instr->beginLoop();
+      storage->setCurrentBlock(instr->currentBlock());
+      return;
+   }
+      break;
+   case TGSI_OPCODE_BGNSUB: {
+      instr->bgnSub(instno);
+      storage->setCurrentBlock(instr->currentBlock());
+      storage->pushTemps();
+      return;
+   }
+      break;
+   case TGSI_OPCODE_ENDLOOP2: {
+      instr->endLoop();
+      storage->setCurrentBlock(instr->currentBlock());
+      return;
+   }
+      break;
+   case TGSI_OPCODE_ENDSUB: {
+      instr->endSub();
+      storage->setCurrentBlock(instr->currentBlock());
+      storage->popArguments();
+      storage->popTemps();
+      return;
+   }
+      break;
+   case TGSI_OPCODE_NOISE1:
+      break;
+   case TGSI_OPCODE_NOISE2:
+      break;
+   case TGSI_OPCODE_NOISE3:
+      break;
+   case TGSI_OPCODE_NOISE4:
+      break;
+   case TGSI_OPCODE_NOP:
+      break;
+   case TGSI_OPCODE_TEXBEM:
+      break;
+   case TGSI_OPCODE_TEXBEML:
+      break;
+   case TGSI_OPCODE_TEXREG2AR:
+      break;
+   case TGSI_OPCODE_TEXM3X2PAD:
+      break;
+   case TGSI_OPCODE_TEXM3X2TEX:
+      break;
+   case TGSI_OPCODE_TEXM3X3PAD:
+      break;
+   case TGSI_OPCODE_TEXM3X3TEX:
+      break;
+   case TGSI_OPCODE_TEXM3X3SPEC:
+      break;
+   case TGSI_OPCODE_TEXM3X3VSPEC:
+      break;
+   case TGSI_OPCODE_TEXREG2GB:
+      break;
+   case TGSI_OPCODE_TEXREG2RGB:
+      break;
+   case TGSI_OPCODE_TEXDP3TEX:
+      break;
+   case TGSI_OPCODE_TEXDP3:
+      break;
+   case TGSI_OPCODE_TEXM3X3:
+      break;
+   case TGSI_OPCODE_TEXM3X2DEPTH:
+      break;
+   case TGSI_OPCODE_TEXDEPTH:
+      break;
+   case TGSI_OPCODE_BEM:
+      break;
+   case TGSI_OPCODE_M4X3:
+      break;
+   case TGSI_OPCODE_M3X4:
+      break;
+   case TGSI_OPCODE_M3X3:
+      break;
+   case TGSI_OPCODE_M3X2:
+      break;
+   case TGSI_OPCODE_NRM4:
+      break;
+   case TGSI_OPCODE_CALLNZ:
+      break;
+   case TGSI_OPCODE_IFC:
+      break;
+   case TGSI_OPCODE_BREAKC:
+      break;
+   case TGSI_OPCODE_KIL:
+      break;
+   case TGSI_OPCODE_END:
+      instr->end();
+      return;
+      break;
+   default:
+      fprintf(stderr, "ERROR: Unknown opcode %d\n",
+              inst->Instruction.Opcode);
+      assert(0);
+      break;
+   }
+
+   if (!out) {
+      fprintf(stderr, "ERROR: unsupported opcode %d\n",
+              inst->Instruction.Opcode);
+      assert(!"Unsupported opcode");
+   }
+
+   /* # not sure if we need this */
+   switch( inst->Instruction.Saturate ) {
+   case TGSI_SAT_NONE:
+      break;
+   case TGSI_SAT_ZERO_ONE:
+      /*TXT( "_SAT" );*/
+      break;
+   case TGSI_SAT_MINUS_PLUS_ONE:
+      /*TXT( "_SAT[-1,1]" );*/
+      break;
+   default:
+      assert( 0 );
+   }
+
+   /* store results  */
+   for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
+      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+
+      if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
+         storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+      } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) {
+         storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+      } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) {
+         storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+      } else {
+         fprintf(stderr, "ERROR: unsupported LLVM destination!");
+         assert(!"wrong destination");
+      }
+   }
+}
+
+
+static void
+translate_instructionir(llvm::Module *module,
+                        StorageSoa *storage,
+                        InstructionsSoa *instr,
+                        struct tgsi_full_instruction *inst,
+                        struct tgsi_full_instruction *fi,
+                        unsigned instno)
+{
+   std::vector< std::vector<llvm::Value*> > inputs(inst->Instruction.NumSrcRegs);
+
+   for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
+      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+      std::vector<llvm::Value*> val;
+      llvm::Value *indIdx = 0;
+      int swizzle = swizzleInt(src);
+
+      if (src->SrcRegister.Indirect) {
+         indIdx = storage->addrElement(src->SrcRegisterInd.Index);
+         indIdx = storage->extractIndex(indIdx);
+      }
+      if (src->SrcRegister.File == TGSI_FILE_CONSTANT) {
+         val = storage->constElement(src->SrcRegister.Index, swizzle, indIdx);
+      } else if (src->SrcRegister.File == TGSI_FILE_INPUT) {
+         val = storage->inputElement(src->SrcRegister.Index, swizzle, indIdx);
+      } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+         val = storage->tempElement(src->SrcRegister.Index, swizzle);
+      } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) {
+         val = storage->outputElement(src->SrcRegister.Index, swizzle, indIdx);
+      } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
+         val = storage->immediateElement(src->SrcRegister.Index, swizzle);
+      } else {
+         fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File);
+         return;
+      }
+
+      inputs[i] = val;
+   }
+
+   std::vector<llvm::Value*> out(4);
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_ARL: {
+   }
+      break;
+   case TGSI_OPCODE_MOV: {
+      out = inputs[0];
+   }
+      break;
+   case TGSI_OPCODE_LIT: {
+   }
+      break;
+   case TGSI_OPCODE_RCP: {
+   }
+      break;
+   case TGSI_OPCODE_RSQ: {
+   }
+      break;
+   case TGSI_OPCODE_EXP:
+      break;
+   case TGSI_OPCODE_LOG:
+      break;
+   case TGSI_OPCODE_MUL: {
+      out = instr->mul(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_ADD: {
+      out = instr->add(inputs[0], inputs[1]);
+   }
+      break;
+   case TGSI_OPCODE_DP3: {
+   }
+      break;
+   case TGSI_OPCODE_DP4: {
+   }
+      break;
+   case TGSI_OPCODE_DST: {
+   }
+      break;
+   case TGSI_OPCODE_MIN: {
+   }
+      break;
+   case TGSI_OPCODE_MAX: {
+   }
+      break;
+   case TGSI_OPCODE_SLT: {
+   }
+      break;
+   case TGSI_OPCODE_SGE: {
+   }
+      break;
+   case TGSI_OPCODE_MAD: {
+   }
+      break;
+   case TGSI_OPCODE_SUB: {
+   }
+      break;
+   case TGSI_OPCODE_LERP: {
+   }
+      break;
+   case TGSI_OPCODE_CND:
+      break;
+   case TGSI_OPCODE_CND0:
+      break;
+   case TGSI_OPCODE_DOT2ADD:
+      break;
+   case TGSI_OPCODE_INDEX:
+      break;
+   case TGSI_OPCODE_NEGATE:
+      break;
+   case TGSI_OPCODE_FRAC: {
+   }
+      break;
+   case TGSI_OPCODE_CLAMP:
+      break;
+   case TGSI_OPCODE_FLOOR: {
+   }
+      break;
+   case TGSI_OPCODE_ROUND:
+      break;
+   case TGSI_OPCODE_EXPBASE2: {
+   }
+      break;
+   case TGSI_OPCODE_LOGBASE2: {
+   }
+      break;
+   case TGSI_OPCODE_POWER: {
+   }
+      break;
+   case TGSI_OPCODE_CROSSPRODUCT: {
+   }
+      break;
+   case TGSI_OPCODE_MULTIPLYMATRIX:
+      break;
+   case TGSI_OPCODE_ABS: {
+   }
+      break;
+   case TGSI_OPCODE_RCC:
+      break;
+   case TGSI_OPCODE_DPH: {
+   }
+      break;
+   case TGSI_OPCODE_COS: {
+   }
+      break;
+   case TGSI_OPCODE_DDX:
+      break;
+   case TGSI_OPCODE_DDY:
+      break;
+   case TGSI_OPCODE_KILP: {
+   }
+      break;
+   case TGSI_OPCODE_PK2H:
+      break;
+   case TGSI_OPCODE_PK2US:
+      break;
+   case TGSI_OPCODE_PK4B:
+      break;
+   case TGSI_OPCODE_PK4UB:
+      break;
+   case TGSI_OPCODE_RFL:
+      break;
+   case TGSI_OPCODE_SEQ:
+      break;
+   case TGSI_OPCODE_SFL:
+      break;
+   case TGSI_OPCODE_SGT: {
+   }
+      break;
+   case TGSI_OPCODE_SIN: {
+   }
+      break;
+   case TGSI_OPCODE_SLE:
+      break;
+   case TGSI_OPCODE_SNE:
+      break;
+   case TGSI_OPCODE_STR:
+      break;
+   case TGSI_OPCODE_TEX:
+      break;
+   case TGSI_OPCODE_TXD:
+      break;
+   case TGSI_OPCODE_UP2H:
+      break;
+   case TGSI_OPCODE_UP2US:
+      break;
+   case TGSI_OPCODE_UP4B:
+      break;
+   case TGSI_OPCODE_UP4UB:
+      break;
+   case TGSI_OPCODE_X2D:
+      break;
+   case TGSI_OPCODE_ARA:
+      break;
+   case TGSI_OPCODE_ARR:
+      break;
+   case TGSI_OPCODE_BRA:
+      break;
+   case TGSI_OPCODE_CAL: {
+   }
+      break;
+   case TGSI_OPCODE_RET: {
+   }
+      break;
+   case TGSI_OPCODE_SSG:
+      break;
+   case TGSI_OPCODE_CMP: {
+   }
+      break;
+   case TGSI_OPCODE_SCS: {
+   }
+      break;
+   case TGSI_OPCODE_TXB:
+      break;
+   case TGSI_OPCODE_NRM:
+      break;
+   case TGSI_OPCODE_DIV:
+      break;
+   case TGSI_OPCODE_DP2:
+      break;
+   case TGSI_OPCODE_TXL:
+      break;
+   case TGSI_OPCODE_BRK: {
+   }
+      break;
+   case TGSI_OPCODE_IF: {
+   }
+      break;
+   case TGSI_OPCODE_LOOP:
+      break;
+   case TGSI_OPCODE_REP:
+      break;
+   case TGSI_OPCODE_ELSE: {
+   }
+      break;
+   case TGSI_OPCODE_ENDIF: {
+   }
+      break;
+   case TGSI_OPCODE_ENDLOOP:
+      break;
+   case TGSI_OPCODE_ENDREP:
+      break;
+   case TGSI_OPCODE_PUSHA:
+      break;
+   case TGSI_OPCODE_POPA:
+      break;
+   case TGSI_OPCODE_CEIL:
+      break;
+   case TGSI_OPCODE_I2F:
+      break;
+   case TGSI_OPCODE_NOT:
+      break;
+   case TGSI_OPCODE_TRUNC: {
+   }
+      break;
+   case TGSI_OPCODE_SHL:
+      break;
+   case TGSI_OPCODE_SHR:
+      break;
+   case TGSI_OPCODE_AND:
+      break;
+   case TGSI_OPCODE_OR:
+      break;
+   case TGSI_OPCODE_MOD:
+      break;
+   case TGSI_OPCODE_XOR:
+      break;
+   case TGSI_OPCODE_SAD:
+      break;
+   case TGSI_OPCODE_TXF:
+      break;
+   case TGSI_OPCODE_TXQ:
+      break;
+   case TGSI_OPCODE_CONT:
+      break;
+   case TGSI_OPCODE_EMIT:
+      break;
+   case TGSI_OPCODE_ENDPRIM:
+      break;
+   case TGSI_OPCODE_BGNLOOP2: {
+   }
+      break;
+   case TGSI_OPCODE_BGNSUB: {
+   }
+      break;
+   case TGSI_OPCODE_ENDLOOP2: {
+   }
+      break;
+   case TGSI_OPCODE_ENDSUB: {
+   }
+      break;
+   case TGSI_OPCODE_NOISE1:
+      break;
+   case TGSI_OPCODE_NOISE2:
+      break;
+   case TGSI_OPCODE_NOISE3:
+      break;
+   case TGSI_OPCODE_NOISE4:
+      break;
+   case TGSI_OPCODE_NOP:
+      break;
+   case TGSI_OPCODE_TEXBEM:
+      break;
+   case TGSI_OPCODE_TEXBEML:
+      break;
+   case TGSI_OPCODE_TEXREG2AR:
+      break;
+   case TGSI_OPCODE_TEXM3X2PAD:
+      break;
+   case TGSI_OPCODE_TEXM3X2TEX:
+      break;
+   case TGSI_OPCODE_TEXM3X3PAD:
+      break;
+   case TGSI_OPCODE_TEXM3X3TEX:
+      break;
+   case TGSI_OPCODE_TEXM3X3SPEC:
+      break;
+   case TGSI_OPCODE_TEXM3X3VSPEC:
+      break;
+   case TGSI_OPCODE_TEXREG2GB:
+      break;
+   case TGSI_OPCODE_TEXREG2RGB:
+      break;
+   case TGSI_OPCODE_TEXDP3TEX:
+      break;
+   case TGSI_OPCODE_TEXDP3:
+      break;
+   case TGSI_OPCODE_TEXM3X3:
+      break;
+   case TGSI_OPCODE_TEXM3X2DEPTH:
+      break;
+   case TGSI_OPCODE_TEXDEPTH:
+      break;
+   case TGSI_OPCODE_BEM:
+      break;
+   case TGSI_OPCODE_M4X3:
+      break;
+   case TGSI_OPCODE_M3X4:
+      break;
+   case TGSI_OPCODE_M3X3:
+      break;
+   case TGSI_OPCODE_M3X2:
+      break;
+   case TGSI_OPCODE_NRM4:
+      break;
+   case TGSI_OPCODE_CALLNZ:
+      break;
+   case TGSI_OPCODE_IFC:
+      break;
+   case TGSI_OPCODE_BREAKC:
+      break;
+   case TGSI_OPCODE_KIL:
+      break;
+   case TGSI_OPCODE_END:
+      instr->end();
+      return;
+      break;
+   default:
+      fprintf(stderr, "ERROR: Unknown opcode %d\n",
+              inst->Instruction.Opcode);
+      assert(0);
+      break;
+   }
+
+   if (!out[0]) {
+      fprintf(stderr, "ERROR: unsupported opcode %d\n",
+              inst->Instruction.Opcode);
+      assert(!"Unsupported opcode");
+   }
+
+   /* store results  */
+   for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
+      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+
+      if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
+         storage->storeOutput(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+      } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) {
+         storage->storeTemp(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+      } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) {
+         storage->storeAddress(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+      } else {
+         fprintf(stderr, "ERROR: unsupported LLVM destination!");
+         assert(!"wrong destination");
+      }
+   }
+}
+
+llvm::Module *
+tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens)
+{
+   llvm::Module *mod = createBaseShader();
+   struct tgsi_parse_context parse;
+   struct tgsi_full_instruction fi;
+   struct tgsi_full_declaration fd;
+   unsigned instno = 0;
+   Function* shader = mod->getFunction("execute_shader");
+   std::ostringstream stream;
+   if (ir->type == GALLIVM_VS) {
+      stream << "vs_shader";
+   } else {
+      stream << "fs_shader";
+   }
+   stream << ir->id;
+   std::string func_name = stream.str();
+   shader->setName(func_name.c_str());
+
+   Function::arg_iterator args = shader->arg_begin();
+   Value *ptr_INPUT = args++;
+   ptr_INPUT->setName("input");
+
+   BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
+
+   tgsi_parse_init(&parse, tokens);
+
+   fi = tgsi_default_full_instruction();
+   fd = tgsi_default_full_declaration();
+   Storage storage(label_entry, ptr_INPUT);
+   Instructions instr(mod, shader, label_entry, &storage);
+   while(!tgsi_parse_end_of_tokens(&parse)) {
+      tgsi_parse_token(&parse);
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         translate_declaration(ir, mod, &storage,
+                               &parse.FullToken.FullDeclaration,
+                               &fd);
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         translate_immediate(&storage,
+                             &parse.FullToken.FullImmediate);
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         translate_instruction(mod, &storage, &instr,
+                               &parse.FullToken.FullInstruction,
+                               &fi, instno);
+         ++instno;
+         break;
+
+      default:
+         assert(0);
+      }
+   }
+
+   tgsi_parse_free(&parse);
+
+   ir->num_consts = storage.numConsts();
+   return mod;
+}
+
+llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
+                              const struct tgsi_token *tokens)
+{
+   llvm::Module *mod = createBaseShader();
+   struct tgsi_parse_context parse;
+   struct tgsi_full_instruction fi;
+   struct tgsi_full_declaration fd;
+   unsigned instno = 0;
+   Function* shader = mod->getFunction("execute_shader");
+   std::ostringstream stream;
+   if (ir->type == GALLIVM_VS) {
+      stream << "vs_shader";
+   } else {
+      stream << "fs_shader";
+   }
+   stream << ir->id;
+   std::string func_name = stream.str();
+   shader->setName(func_name.c_str());
+
+   Function::arg_iterator args = shader->arg_begin();
+   Value *input = args++;
+   input->setName("input");
+   Value *output = args++;
+   output->setName("output");
+   Value *consts = args++;
+   consts->setName("consts");
+
+   BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
+
+   tgsi_parse_init(&parse, tokens);
+
+   fi = tgsi_default_full_instruction();
+   fd = tgsi_default_full_declaration();
+
+   StorageSoa storage(label_entry, input, output, consts);
+   InstructionsSoa instr(mod, shader, label_entry, &storage);
+
+   while(!tgsi_parse_end_of_tokens(&parse)) {
+      tgsi_parse_token(&parse);
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         translate_declarationir(ir, mod, &storage,
+                                 &parse.FullToken.FullDeclaration,
+                                 &fd);
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         translate_immediateir(&storage,
+                             &parse.FullToken.FullImmediate);
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         translate_instructionir(mod, &storage, &instr,
+                                 &parse.FullToken.FullInstruction,
+                                 &fi, instno);
+         ++instno;
+         break;
+
+      default:
+         assert(0);
+      }
+   }
+
+   tgsi_parse_free(&parse);
+
+   return mod;
+}
diff --git a/src/mesa/pipe/llvm/tgsitollvm.h b/src/mesa/pipe/llvm/tgsitollvm.h
new file mode 100644
index 0000000000..7ada04d629
--- /dev/null
+++ b/src/mesa/pipe/llvm/tgsitollvm.h
@@ -0,0 +1,20 @@
+#ifndef TGSITOLLVM_H
+#define TGSITOLLVM_H
+
+
+namespace llvm {
+   class Module;
+}
+
+struct gallivm_ir;
+struct tgsi_token;
+
+
+llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir,
+                            const struct tgsi_token *tokens);
+
+
+llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
+                              const struct tgsi_token *tokens);
+
+#endif
diff --git a/src/mesa/pipe/p_state.h b/src/mesa/pipe/p_state.h
index 83ca43f678..4d3a6b2f41 100644
--- a/src/mesa/pipe/p_state.h
+++ b/src/mesa/pipe/p_state.h
@@ -234,14 +234,9 @@ struct pipe_sampler_state
    unsigned compare_mode:1;  /**< PIPE_TEX_COMPARE_x */
    unsigned compare_func:3;  /**< PIPE_FUNC_x */
    unsigned normalized_coords:1;  /**< Are coords normalized to [0,1]? */
-   float shadow_ambient; /**< shadow test fail color/intensity */
-   float min_lod;
-   float max_lod;
-   float lod_bias;
-#if 0 /* need these? */
-   int BaseLevel;     /**< min mipmap level, OpenGL 1.2 */
-   int MaxLevel;      /**< max mipmap level, OpenGL 1.2 */
-#endif
+   float shadow_ambient;          /**< shadow test fail color/intensity */
+   float lod_bias;                /**< LOD/lambda bias */
+   float min_lod, max_lod;        /**< LOD clamp range, after bias */
    float border_color[4];
    float max_anisotropy;
 };
@@ -277,8 +272,7 @@ struct pipe_texture
    enum pipe_texture_target target; /**< PIPE_TEXTURE_x */
    enum pipe_format format;         /**< PIPE_FORMAT_x */
 
-   unsigned first_level;
-   unsigned last_level;
+   unsigned last_level;    /**< Index of last mipmap level present/defined */
 
    unsigned width[PIPE_MAX_TEXTURE_LEVELS];
    unsigned height[PIPE_MAX_TEXTURE_LEVELS];
diff --git a/src/mesa/pipe/p_winsys.h b/src/mesa/pipe/p_winsys.h
index 95e3684008..1e81eebd78 100644
--- a/src/mesa/pipe/p_winsys.h
+++ b/src/mesa/pipe/p_winsys.h
@@ -112,7 +112,7 @@ struct pipe_winsys
 
    /** 
     * Map the entire data store of a buffer object into the client's address.
-    * flags is bitmask of PIPE_BUFFER_FLAG_READ/WRITE. 
+    * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. 
     */
    void *(*buffer_map)( struct pipe_winsys *sws, 
 			struct pipe_buffer *buf,
diff --git a/src/mesa/pipe/softpipe/sp_state_fs.c b/src/mesa/pipe/softpipe/sp_state_fs.c
index 1430be7869..0b814fc284 100644
--- a/src/mesa/pipe/softpipe/sp_state_fs.c
+++ b/src/mesa/pipe/softpipe/sp_state_fs.c
@@ -61,12 +61,16 @@ softpipe_create_fs_state(struct pipe_context *pipe,
    }
 
 #ifdef MESA_LLVM
-   state->llvm_prog = gallivm_from_tgsi(state->shader.tokens, GALLIVM_FS);
+   state->llvm_prog = 0;
+
+#if 0
    if (!gallivm_global_cpu_engine()) {
       gallivm_cpu_engine_create(state->llvm_prog);
    }
    else
       gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog);
+#endif
+
 #elif defined(__i386__) || defined(__386__)
    if (softpipe->use_sse) {
       x86_init_func( &state->sse2_program );
diff --git a/src/mesa/pipe/softpipe/sp_state_sampler.c b/src/mesa/pipe/softpipe/sp_state_sampler.c
index 291bbc40ad..6a5a643c89 100644
--- a/src/mesa/pipe/softpipe/sp_state_sampler.c
+++ b/src/mesa/pipe/softpipe/sp_state_sampler.c
@@ -34,6 +34,8 @@
 #include "sp_state.h"
 #include "sp_texture.h"
 #include "sp_tile_cache.h"
+#include "pipe/draw/draw_context.h"
+
 
 
 void *
@@ -73,6 +75,8 @@ softpipe_set_sampler_texture(struct pipe_context *pipe,
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
 
+   draw_flush(softpipe->draw);
+
    assert(unit < PIPE_MAX_SAMPLERS);
    softpipe->texture[unit] = softpipe_texture(texture);  /* ptr, not struct */
 
diff --git a/src/mesa/pipe/softpipe/sp_tex_sample.c b/src/mesa/pipe/softpipe/sp_tex_sample.c
index 5e215c433a..325bdb86da 100644
--- a/src/mesa/pipe/softpipe/sp_tex_sample.c
+++ b/src/mesa/pipe/softpipe/sp_tex_sample.c
@@ -449,7 +449,6 @@ compute_lambda(struct tgsi_sampler *sampler,
    }
 
    lambda = LOG2(rho);
-
    lambda += lodbias + sampler->state->lod_bias;
    lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod);
 
@@ -457,7 +456,6 @@ compute_lambda(struct tgsi_sampler *sampler,
 }
 
 
-
 /**
  * Do several things here:
  * 1. Compute lambda from the texcoords, if needed
@@ -477,7 +475,7 @@ choose_mipmap_levels(struct tgsi_sampler *sampler,
    if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
       /* no mipmap selection needed */
       *imgFilter = sampler->state->mag_img_filter;
-      *level0 = *level1 = sampler->texture->first_level;
+      *level0 = *level1 = (int) sampler->state->min_lod;
    }
    else {
       float lambda;
@@ -492,7 +490,7 @@ choose_mipmap_levels(struct tgsi_sampler *sampler,
       if (lambda < 0.0) { /* XXX threshold depends on the filter */
          /* magnifying */
          *imgFilter = sampler->state->mag_img_filter;
-         *level0 = *level1 = sampler->texture->first_level;
+         *level0 = *level1 = 0;
       }
       else {
          /* minifying */
@@ -503,19 +501,13 @@ choose_mipmap_levels(struct tgsi_sampler *sampler,
             /* Nearest mipmap level */
             const int lvl = (int) (lambda + 0.5);
             *level0 =
-            *level1 = CLAMP(lvl,
-                            (int) sampler->texture->first_level,
-                            (int) sampler->texture->last_level);
+            *level1 = CLAMP(lvl, 0, (int) sampler->texture->last_level);
          }
          else {
             /* Linear interpolation between mipmap levels */
             const int lvl = (int) lambda;
-            *level0 = CLAMP(lvl,
-                            (int) sampler->texture->first_level,
-                            (int) sampler->texture->last_level);
-            *level1 = CLAMP(lvl + 1,
-                            (int) sampler->texture->first_level,
-                            (int) sampler->texture->last_level);
+            *level0 = CLAMP(lvl,     0, (int) sampler->texture->last_level);
+            *level1 = CLAMP(lvl + 1, 0, (int) sampler->texture->last_level);
             *levelBlend = FRAC(lambda);  /* blending weight between levels */
          }
       }
diff --git a/src/mesa/pipe/softpipe/sp_texture.c b/src/mesa/pipe/softpipe/sp_texture.c
index fd2cc3dbbb..6de7a9b543 100644
--- a/src/mesa/pipe/softpipe/sp_texture.c
+++ b/src/mesa/pipe/softpipe/sp_texture.c
@@ -61,7 +61,7 @@ softpipe_texture_layout(struct softpipe_texture * spt)
 
    spt->buffer_size = 0;
 
-   for ( level = pt->first_level ; level <= pt->last_level ; level++ ) {
+   for (level = 0; level <= pt->last_level; level++) {
       pt->width[level] = width;
       pt->height[level] = height;
       pt->depth[level] = depth;
@@ -139,6 +139,8 @@ softpipe_get_tex_surface(struct pipe_context *pipe,
    struct softpipe_texture *spt = softpipe_texture(pt);
    struct pipe_surface *ps;
 
+   assert(level <= pt->last_level);
+
    ps = pipe->winsys->surface_alloc(pipe->winsys);
    if (ps) {
       assert(ps->refcount);
author	Ben Skeggs <skeggsb@gmail.com>	2008-02-13 14:21:23 +1100
committer	Ben Skeggs <skeggsb@gmail.com>	2008-02-13 14:21:23 +1100
commit	b397a2bb203c2b28b746af7828d9ad192cde0bc1 (patch)
tree	77448bf1db79ae112f057717109aaa98e5696b15 /src/mesa/pipe
parent	5ba3dbe2cc8a9af5cae01f45eaf497f834400170 (diff)
parent	e20e89e48287808068086ec148920dd89495d813 (diff)