From f141abdc8fdbff41e16b0ce53fa3fa8fba32a7f9 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sun, 8 Aug 2010 01:13:26 +0800
Subject: draw: Add flags to draw_prim_info.

A primitive may be splitted in frontends.  The splitted primitives
should convey certain flag bits so that the decomposer can correctly
decide the stipple or edge flags.

This commit adds flags to draw_prim_info and updates the decomposer to
honor the flags.  Frontends and middle ends will be updated later.
---
 src/gallium/auxiliary/draw/draw_decompose_tmp.h    | 26 +++++++++++++---------
 src/gallium/auxiliary/draw/draw_gs.c               |  1 +
 src/gallium/auxiliary/draw/draw_gs_tmp.h           |  1 +
 src/gallium/auxiliary/draw/draw_pipe.c             |  4 ++++
 src/gallium/auxiliary/draw/draw_private.h          |  5 +++++
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |  3 +++
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c       |  3 +++
 src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h    |  3 ++-
 src/gallium/auxiliary/draw/draw_so_emit_tmp.h      |  1 +
 9 files changed, 36 insertions(+), 11 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
index a52d2b5058..be3a997c3d 100644
--- a/src/gallium/auxiliary/draw/draw_decompose_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
@@ -54,10 +54,10 @@ FUNC(FUNC_VARS)
 
    FUNC_ENTER;
 
-   /* prim, count, and last_vertex_last should have been defined */
+   /* prim, prim_flags, count, and last_vertex_last should have been defined */
    if (0) {
-      debug_printf("%s: prim 0x%x, count %d, last_vertex_last %d\n",
-            __FUNCTION__, prim, count, last_vertex_last);
+      debug_printf("%s: prim 0x%x, prim_flags 0x%x, count %d, last_vertex_last %d\n",
+            __FUNCTION__, prim, prim_flags, count, last_vertex_last);
    }
 
    switch (prim) {
@@ -80,7 +80,7 @@ FUNC(FUNC_VARS)
    case PIPE_PRIM_LINE_LOOP:
    case PIPE_PRIM_LINE_STRIP:
       if (count >= 2) {
-         flags = DRAW_PIPE_RESET_STIPPLE;
+         flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
          idx[1] = GET_ELT(0);
          idx[2] = idx[1];
 
@@ -90,7 +90,7 @@ FUNC(FUNC_VARS)
             LINE(flags, idx[0], idx[1]);
          }
          /* close the loop */
-         if (prim == PIPE_PRIM_LINE_LOOP)
+         if (prim == PIPE_PRIM_LINE_LOOP && !prim_flags)
             LINE(flags, idx[1], idx[2]);
       }
       break;
@@ -255,17 +255,23 @@ FUNC(FUNC_VARS)
 
          if (last_vertex_last) {
             flags = (DRAW_PIPE_RESET_STIPPLE |
-                     DRAW_PIPE_EDGE_FLAG_2 |
                      DRAW_PIPE_EDGE_FLAG_0);
+            if (!(prim_flags & DRAW_SPLIT_BEFORE))
+               flags |= DRAW_PIPE_EDGE_FLAG_1;
+
             edge_next = DRAW_PIPE_EDGE_FLAG_0;
-            edge_finish = DRAW_PIPE_EDGE_FLAG_1;
+            edge_finish =
+               (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_1;
          }
          else {
             flags = (DRAW_PIPE_RESET_STIPPLE |
-                     DRAW_PIPE_EDGE_FLAG_0 |
                      DRAW_PIPE_EDGE_FLAG_1);
+            if (!(prim_flags & DRAW_SPLIT_BEFORE))
+               flags |= DRAW_PIPE_EDGE_FLAG_0;
+
             edge_next = DRAW_PIPE_EDGE_FLAG_1;
-            edge_finish = DRAW_PIPE_EDGE_FLAG_2;
+            edge_finish =
+               (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_2;
          }
 
          idx[0] = GET_ELT(0);
@@ -300,7 +306,7 @@ FUNC(FUNC_VARS)
 
    case PIPE_PRIM_LINE_STRIP_ADJACENCY:
       if (count >= 4) {
-         flags = DRAW_PIPE_RESET_STIPPLE;
+         flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
          idx[1] = GET_ELT(0);
          idx[2] = GET_ELT(1);
          idx[3] = GET_ELT(2);
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 4a1013e79a..592f71bfbe 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -457,6 +457,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
    output_prims->start = 0;
    output_prims->count = shader->emitted_vertices;
    output_prims->prim = shader->output_primitive;
+   output_prims->flags = 0x0;
    output_prims->primitive_lengths = shader->primitive_lengths;
    output_prims->primitive_count = shader->emitted_primitives;
    output_verts->count = shader->emitted_vertices;
diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h
index 4a17af0dea..7c8a9f9cfc 100644
--- a/src/gallium/auxiliary/draw/draw_gs_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h
@@ -12,6 +12,7 @@
    const boolean last_vertex_last =                               \
       !(draw->rasterizer->flatshade &&                            \
         draw->rasterizer->flatshade_first);                       \
+   const unsigned prim_flags = input_prims->flags;                \
    do {                                                           \
       debug_assert(input_prims->primitive_count == 1);            \
       switch (prim) {                                             \
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 58995e0724..6a9e4d5e90 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -207,6 +207,7 @@ static void do_triangle( struct draw_context *draw,
 #define FUNC_VARS                               \
     struct draw_context *draw,                  \
     unsigned prim,                              \
+    unsigned prim_flags,                        \
     struct vertex_header *vertices,             \
     unsigned stride,                            \
     const ushort *elts,                         \
@@ -261,6 +262,7 @@ void draw_pipeline_run( struct draw_context *draw,
 
       pipe_run_elts(draw,
                     prim_info->prim,
+                    prim_info->flags,
                     vert_info->verts,
                     vert_info->stride,
                     prim_info->elts + start,
@@ -298,6 +300,7 @@ void draw_pipeline_run( struct draw_context *draw,
 #define FUNC_VARS                      \
     struct draw_context *draw,         \
     unsigned prim,                     \
+    unsigned prim_flags,               \
     struct vertex_header *vertices,    \
     unsigned stride,                   \
     unsigned count
@@ -330,6 +333,7 @@ void draw_pipeline_run_linear( struct draw_context *draw,
 
       pipe_run_linear(draw,
                       prim_info->prim,
+                      prim_info->flags,
                       (struct vertex_header*)verts,
                       vert_info->stride,
                       count);
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 397d4bf653..826f5dc98c 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -296,6 +296,10 @@ struct draw_vertex_info {
    unsigned count;
 };
 
+/* these flags are set if the primitive is a segment of a larger one */
+#define DRAW_SPLIT_BEFORE 0x1
+#define DRAW_SPLIT_AFTER  0x2
+
 struct draw_prim_info {
    boolean linear;
    unsigned start;
@@ -304,6 +308,7 @@ struct draw_prim_info {
    unsigned count;
 
    unsigned prim;
+   unsigned flags;
    unsigned *primitive_lengths;
    unsigned primitive_count;
 };
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 5b16c3788e..92588cd7f8 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -311,6 +311,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
    prim_info.count = draw_count;
    prim_info.elts = draw_elts;
    prim_info.prim = fpme->input_prim;
+   prim_info.flags = 0x0;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &draw_count;
 
@@ -336,6 +337,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
    prim_info.count = count;
    prim_info.elts = NULL;
    prim_info.prim = fpme->input_prim;
+   prim_info.flags = 0x0;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &count;
 
@@ -364,6 +366,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
    prim_info.count = draw_count;
    prim_info.elts = draw_elts;
    prim_info.prim = fpme->input_prim;
+   prim_info.flags = 0x0;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &draw_count;
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 4b99bee86a..46701f11b5 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -310,6 +310,7 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
    prim_info.count = draw_count;
    prim_info.elts = draw_elts;
    prim_info.prim = fpme->input_prim;
+   prim_info.flags = 0x0;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &draw_count;
 
@@ -335,6 +336,7 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
    prim_info.count = count;
    prim_info.elts = NULL;
    prim_info.prim = fpme->input_prim;
+   prim_info.flags = 0x0;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &count;
 
@@ -364,6 +366,7 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
    prim_info.count = draw_count;
    prim_info.elts = draw_elts;
    prim_info.prim = fpme->input_prim;
+   prim_info.flags = 0x0;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &draw_count;
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
index 1a3748d5f0..8a841e83f2 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
@@ -10,7 +10,8 @@
    struct draw_context *draw = vcache->draw;                               \
    const unsigned prim = vcache->input_prim;                               \
    const boolean last_vertex_last = !(draw->rasterizer->flatshade &&       \
-                                      draw->rasterizer->flatshade_first);
+                                      draw->rasterizer->flatshade_first);  \
+   const unsigned prim_flags = 0x0;
 
 #define GET_ELT(idx) (get_elt(elts, idx) + elt_bias)
 
diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
index 6d8937a0b4..1446e81bba 100644
--- a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
@@ -12,6 +12,7 @@
    const boolean last_vertex_last =                               \
       !(draw->rasterizer->flatshade &&                            \
         draw->rasterizer->flatshade_first);                       \
+   const unsigned prim_flags = input_prims->flags;                \
    do {                                                           \
       debug_assert(input_prims->primitive_count == 1);            \
       switch (prim) {                                             \
-- 
cgit v1.2.3


From f132498347c41294042db0cc6830abe928d827de Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sun, 8 Aug 2010 00:53:02 +0800
Subject: draw: Add prim flags to middle ends.

Update the middle end interface to pass the primitive flags from the
frontends to the pipeline.  No frontend sets the flags yet.
---
 src/gallium/auxiliary/draw/draw_pt.h                      | 11 ++++++++---
 src/gallium/auxiliary/draw/draw_pt_fetch_emit.c           |  9 ++++++---
 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c     |  9 ++++++---
 src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 15 +++++++++------
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c    | 15 +++++++++------
 src/gallium/auxiliary/draw/draw_pt_varray.c               |  8 +++++---
 src/gallium/auxiliary/draw/draw_pt_vcache.c               |  5 +++--
 7 files changed, 46 insertions(+), 26 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 44356fba4c..8d69b8c8c8 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -80,6 +80,8 @@ struct draw_pt_front_end {
 /* The "middle end" - prepares actual hardware vertices for the
  * hardware backend.
  *
+ * prim_flags is as defined by pipe_draw_info::flags.
+ *
  * Currently two versions of this:
  *     - fetch, vertex shade, cliptest, prim-pipeline
  *     - fetch, emit (ie passthrough)
@@ -94,11 +96,13 @@ struct draw_pt_middle_end {
                 const unsigned *fetch_elts,
                 unsigned fetch_count,
                 const ushort *draw_elts,
-                unsigned draw_count );
+                unsigned draw_count,
+                unsigned prim_flags );
 
    void (*run_linear)(struct draw_pt_middle_end *,
                       unsigned start,
-                      unsigned count);
+                      unsigned count,
+                      unsigned prim_flags );
 
    /* Transform all vertices in a linear range and then draw them with
     * the supplied element list.  May fail and return FALSE.
@@ -107,7 +111,8 @@ struct draw_pt_middle_end {
                             unsigned fetch_start,
                             unsigned fetch_count,
                             const ushort *draw_elts,
-                            unsigned draw_count );
+                            unsigned draw_count,
+                            unsigned prim_flags );
 
    int (*get_max_vertex_count)( struct draw_pt_middle_end * );
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 5c8af17c8e..d826e79dbf 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -210,7 +210,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
                             const unsigned *fetch_elts,
                             unsigned fetch_count,
                             const ushort *draw_elts,
-                            unsigned draw_count )
+                            unsigned draw_count,
+                            unsigned prim_flags )
 {
    struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
    struct draw_context *draw = feme->draw;
@@ -273,7 +274,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
 
 static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
                                    unsigned start,
-                                   unsigned count )
+                                   unsigned count,
+                                   unsigned prim_flags )
 {
    struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
    struct draw_context *draw = feme->draw;
@@ -334,7 +336,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
                                         unsigned start,
                                         unsigned count,
                                         const ushort *draw_elts,
-                                        unsigned draw_count )
+                                        unsigned draw_count,
+                                        unsigned prim_flags )
 {
    struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
    struct draw_context *draw = feme->draw;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index b8270280b6..c64104dda5 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -197,7 +197,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
 
 static void fse_run_linear( struct draw_pt_middle_end *middle, 
                             unsigned start, 
-                            unsigned count )
+                            unsigned count,
+                            unsigned prim_flags )
 {
    struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
    struct draw_context *draw = fse->draw;
@@ -265,7 +266,8 @@ fse_run(struct draw_pt_middle_end *middle,
         const unsigned *fetch_elts,
         unsigned fetch_count,
         const ushort *draw_elts,
-        unsigned draw_count )
+        unsigned draw_count,
+        unsigned prim_flags )
 {
    struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
    struct draw_context *draw = fse->draw;
@@ -327,7 +329,8 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
                                  unsigned start, 
                                  unsigned count,
                                  const ushort *draw_elts,
-                                 unsigned draw_count )
+                                 unsigned draw_count,
+                                 unsigned prim_flags )
 {
    struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
    struct draw_context *draw = fse->draw;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 92588cd7f8..1ac20d27f3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -295,7 +295,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
                                 const unsigned *fetch_elts,
                                 unsigned fetch_count,
                                 const ushort *draw_elts,
-                                unsigned draw_count )
+                                unsigned draw_count,
+                                unsigned prim_flags )
 {
    struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
    struct draw_fetch_info fetch_info;
@@ -311,7 +312,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
    prim_info.count = draw_count;
    prim_info.elts = draw_elts;
    prim_info.prim = fpme->input_prim;
-   prim_info.flags = 0x0;
+   prim_info.flags = prim_flags;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &draw_count;
 
@@ -321,7 +322,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
 
 static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
                                        unsigned start,
-                                       unsigned count)
+                                       unsigned count,
+                                       unsigned prim_flags)
 {
    struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
    struct draw_fetch_info fetch_info;
@@ -337,7 +339,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
    prim_info.count = count;
    prim_info.elts = NULL;
    prim_info.prim = fpme->input_prim;
-   prim_info.flags = 0x0;
+   prim_info.flags = prim_flags;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &count;
 
@@ -350,7 +352,8 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
                                                unsigned start,
                                                unsigned count,
                                                const ushort *draw_elts,
-                                               unsigned draw_count )
+                                               unsigned draw_count,
+                                               unsigned prim_flags )
 {
    struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
    struct draw_fetch_info fetch_info;
@@ -366,7 +369,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
    prim_info.count = draw_count;
    prim_info.elts = draw_elts;
    prim_info.prim = fpme->input_prim;
-   prim_info.flags = 0x0;
+   prim_info.flags = prim_flags;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &draw_count;
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 46701f11b5..8f2847ffa0 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -294,7 +294,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
                                  const unsigned *fetch_elts,
                                  unsigned fetch_count,
                                  const ushort *draw_elts,
-                                 unsigned draw_count )
+                                 unsigned draw_count,
+                                 unsigned prim_flags )
 {
    struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
    struct draw_fetch_info fetch_info;
@@ -310,7 +311,7 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
    prim_info.count = draw_count;
    prim_info.elts = draw_elts;
    prim_info.prim = fpme->input_prim;
-   prim_info.flags = 0x0;
+   prim_info.flags = prim_flags;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &draw_count;
 
@@ -320,7 +321,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
 
 static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
                                        unsigned start,
-                                       unsigned count)
+                                       unsigned count,
+                                       unsigned prim_flags)
 {
    struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
    struct draw_fetch_info fetch_info;
@@ -336,7 +338,7 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
    prim_info.count = count;
    prim_info.elts = NULL;
    prim_info.prim = fpme->input_prim;
-   prim_info.flags = 0x0;
+   prim_info.flags = prim_flags;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &count;
 
@@ -350,7 +352,8 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
                                  unsigned start,
                                  unsigned count,
                                  const ushort *draw_elts,
-                                 unsigned draw_count )
+                                 unsigned draw_count,
+                                 unsigned prim_flags )
 {
    struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
    struct draw_fetch_info fetch_info;
@@ -366,7 +369,7 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
    prim_info.count = draw_count;
    prim_info.elts = draw_elts;
    prim_info.prim = fpme->input_prim;
-   prim_info.flags = 0x0;
+   prim_info.flags = prim_flags;
    prim_info.primitive_count = 1;
    prim_info.primitive_lengths = &draw_count;
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index cd7bb7bf25..2cda4f018d 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -57,7 +57,7 @@ static void varray_flush_linear(struct varray_frontend *varray,
 {
    if (count) {
       assert(varray->middle->run_linear);
-      varray->middle->run_linear(varray->middle, start, count);
+      varray->middle->run_linear(varray->middle, start, count, 0x0);
    }
 }
 
@@ -83,7 +83,8 @@ static void varray_line_loop_segment(struct varray_frontend *varray,
                           varray->fetch_elts,
                           nr,
                           varray->draw_elts, /* ie. linear */
-                          nr);
+                          nr,
+                          0x0);
    }
 }
 
@@ -110,7 +111,8 @@ static void varray_fan_segment(struct varray_frontend *varray,
                           varray->fetch_elts,
                           nr,
                           varray->draw_elts, /* ie. linear */
-                          nr);
+                          nr,
+                          0x0);
    }
 }
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index a848b54f7d..0a9ec7ce6c 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -82,7 +82,8 @@ vcache_flush( struct vcache_frontend *vcache )
                            vcache->fetch_elts,
                            vcache->fetch_count,
                            vcache->draw_elts,
-                           vcache->draw_count );
+                           vcache->draw_count,
+                           0x0 );
    }
 
    memset(vcache->in, ~0, sizeof(vcache->in));
@@ -509,7 +510,7 @@ vcache_check_run( struct draw_pt_front_end *frontend,
                                             min_index + elt_bias, /* start */
                                             fetch_count,
                                             transformed_elts,
-                                            draw_count );
+                                            draw_count, 0x0 );
    
    FREE(storage);
 
-- 
cgit v1.2.3


From 9d2be38fad109d9a10942fddde0b9dc3824c329c Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sun, 18 Jul 2010 16:53:57 +0800
Subject: draw: Simplify frontend interface a little.

The run method is simplified to take the start vertex and the vertex
count.
---
 src/gallium/auxiliary/draw/draw_pt.c                   | 6 +-----
 src/gallium/auxiliary/draw/draw_pt.h                   | 4 +---
 src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h | 7 +------
 src/gallium/auxiliary/draw/draw_pt_vcache.c            | 8 ++++----
 src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h        | 7 ++++---
 5 files changed, 11 insertions(+), 21 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 248927505d..ded94bb575 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -126,11 +126,7 @@ draw_pt_arrays(struct draw_context *draw,
 
    frontend->prepare( frontend, prim, middle, opt );
 
-   frontend->run(frontend,
-                 draw_pt_elt_func(draw),
-                 draw_pt_elt_ptr(draw, start),
-                 draw->pt.user.eltBias,
-                 count);
+   frontend->run(frontend, start, count);
 
    frontend->finish( frontend );
 
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 8d69b8c8c8..42c4f83272 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -67,9 +67,7 @@ struct draw_pt_front_end {
 		    unsigned opt );
 
    void (*run)( struct draw_pt_front_end *,
-                pt_elt_func elt_func,
-                const void *elt_ptr,
-                int elt_bias,
+                unsigned start,
                 unsigned count );
 
    void (*finish)( struct draw_pt_front_end * );
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
index 55e43b2a71..fc54476488 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -9,19 +9,14 @@ static unsigned trim( unsigned count, unsigned first, unsigned incr )
 }
 
 static void FUNC(struct draw_pt_front_end *frontend,
-                 pt_elt_func get_elt,
-                 const void *elts,
-                 int elt_bias,
+                 unsigned start,
                  unsigned count)
 {
    struct varray_frontend *varray = (struct varray_frontend *)frontend;
-   unsigned start = (unsigned) ((char *) elts - (char *) NULL);
 
    unsigned j;
    unsigned first, incr;
 
-   assert(elt_bias == 0);
-
    draw_pt_split_prim(varray->input_prim, &first, &incr);
    
    /* Sanitize primitive length:
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 0a9ec7ce6c..993f388dc3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -369,9 +369,7 @@ any_instance_divisors(const struct draw_context *draw)
 
 static INLINE void 
 vcache_check_run( struct draw_pt_front_end *frontend, 
-                  pt_elt_func get_elt,
-                  const void *elts,
-                  int elt_bias,
+                  unsigned draw_start,
                   unsigned draw_count )
 {
    struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; 
@@ -379,10 +377,12 @@ vcache_check_run( struct draw_pt_front_end *frontend,
    const unsigned min_index = draw->pt.user.min_index;
    const unsigned max_index = draw->pt.user.max_index;
    const unsigned index_size = draw->pt.user.eltSize;
+   const int elt_bias = draw->pt.user.eltBias;
    unsigned fetch_count;
    const ushort *transformed_elts;
    ushort *storage = NULL;
    boolean ok = FALSE;
+   const void *elts = draw_pt_elt_ptr(draw, draw_start);
 
    /* debug: verify indexes are in range [min_index, max_index] */
    if (0) {
@@ -521,7 +521,7 @@ vcache_check_run( struct draw_pt_front_end *frontend,
                 fetch_count, draw_count);
 
 fail:
-   vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
+   vcache_run( frontend, draw_start, draw_count );
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
index 8a841e83f2..e80a9c7f15 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
@@ -1,14 +1,15 @@
 #define FUNC_VARS                      \
    struct draw_pt_front_end *frontend, \
-   pt_elt_func get_elt,                \
-   const void *elts,                   \
-   int elt_bias,                       \
+   unsigned start,                     \
    unsigned count
 
 #define LOCAL_VARS \
    struct vcache_frontend *vcache = (struct vcache_frontend *) frontend;   \
    struct draw_context *draw = vcache->draw;                               \
    const unsigned prim = vcache->input_prim;                               \
+   const void *elts = draw_pt_elt_ptr(draw, start);                        \
+   pt_elt_func get_elt = draw_pt_elt_func(draw);                           \
+   const int elt_bias = draw->pt.user.eltBias;                             \
    const boolean last_vertex_last = !(draw->rasterizer->flatshade &&       \
                                       draw->rasterizer->flatshade_first);  \
    const unsigned prim_flags = 0x0;
-- 
cgit v1.2.3


From 56213a64fe9e4270fd7886675b1e8224b2d88794 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sat, 7 Aug 2010 14:37:26 +0800
Subject: draw: Add new util function draw_pt_trim_count.

draw_pt_trim_count is renamed from trim in draw_pt.c.
---
 src/gallium/auxiliary/draw/draw_pt.c      | 11 +----------
 src/gallium/auxiliary/draw/draw_pt.h      |  1 +
 src/gallium/auxiliary/draw/draw_pt_util.c |  7 +++++++
 3 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index ded94bb575..2b400eda0f 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -47,15 +47,6 @@ DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE)
 DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
 #endif
 
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
-   if (count < first)
-      return 0;
-   return count - (count - first) % incr; 
-}
-
-
-
 /* Overall we split things into:
  *     - frontend -- prepare fetch_elts, draw_elts - eg vcache
  *     - middle   -- fetch, shade, cliptest, viewport
@@ -77,7 +68,7 @@ draw_pt_arrays(struct draw_context *draw,
    {
       unsigned first, incr;
       draw_pt_split_prim(prim, &first, &incr);
-      count = trim(count, first, incr);
+      count = draw_pt_trim_count(count, first, incr);
       if (count < first)
          return TRUE;
    }
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 42c4f83272..688b15c4fa 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -240,6 +240,7 @@ void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
  * Utils: 
  */
 void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr);
 
 
 #endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
index 182a597cca..513bbbed21 100644
--- a/src/gallium/auxiliary/draw/draw_pt_util.c
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -92,3 +92,10 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
       break;
    }
 }
+
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr)
+{
+   if (count < first)
+      return 0;
+   return count - (count - first) % incr;
+}
-- 
cgit v1.2.3


From 04bc530dbdbe5d004219c9100e35f5d56cfedd80 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sat, 7 Aug 2010 03:36:18 +0800
Subject: draw: Add vsplit frontend.

vsplit is based on varray.  It sets the split flags when a primitive is
splitted.  It also has support for indexed primitives.

For indexed primitives, unlike vcache, vsplit splits the primitives
instead of decomposes them.
---
 src/gallium/auxiliary/Makefile                  |   1 +
 src/gallium/auxiliary/SConscript                |   1 +
 src/gallium/auxiliary/draw/draw_pt.h            |  14 +-
 src/gallium/auxiliary/draw/draw_pt_vsplit.c     | 208 ++++++++++++++++
 src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h | 301 ++++++++++++++++++++++++
 src/gallium/auxiliary/draw/draw_split_tmp.h     | 171 ++++++++++++++
 6 files changed, 695 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/auxiliary/draw/draw_pt_vsplit.c
 create mode 100644 src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
 create mode 100644 src/gallium/auxiliary/draw/draw_split_tmp.h

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 9544e90a96..ac3828c513 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -37,6 +37,7 @@ C_SOURCES = \
 	draw/draw_pt_util.c \
 	draw/draw_pt_varray.c \
 	draw/draw_pt_vcache.c \
+	draw/draw_pt_vsplit.c \
 	draw/draw_vertex.c \
 	draw/draw_vs.c \
 	draw/draw_vs_varient.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 3124e20ce8..89d1caf116 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -82,6 +82,7 @@ source = [
     'draw/draw_pt_util.c',
     'draw/draw_pt_varray.c',
     'draw/draw_pt_vcache.c',
+    'draw/draw_pt_vsplit.c',
     'draw/draw_vertex.c',
     'draw/draw_vs.c',
     'draw/draw_vs_aos.c',
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 688b15c4fa..de3f638db5 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -52,8 +52,19 @@ struct draw_vertex_info;
 /* The "front end" - prepare sets of fetch, draw elements for the
  * middle end.
  *
- * Currenly one version of this:
+ * The fetch elements are indices to the vertices.  The draw elements are
+ * indices to the fetched vertices.  When both arrays of elements are both
+ * linear, middle->run_linear is called;  When only the fetch elements are
+ * linear, middle->run_linear_elts is called;  Otherwise, middle->run is
+ * called.
+ *
+ * When the number of the draw elements exceeds max_vertex of the middle end,
+ * the draw elements (as well as the fetch elements) are splitted and the
+ * middle end is called multiple times.
+ *
+ * Currenly there are:
  *    - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims
+ *    - vsplit - catchall implementation, splits big prims
  * Later:
  *    - varray, varray_split
  *    - velement, velement_split
@@ -138,6 +149,7 @@ const void *draw_pt_elt_ptr( struct draw_context *draw,
  */
 struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
 struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw);
 
 
 /* Middle-ends:
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
new file mode 100644
index 0000000000..a687525309
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -0,0 +1,208 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+#define SEGMENT_SIZE 1024
+#define MAP_SIZE     256
+
+struct vsplit_frontend {
+   struct draw_pt_front_end base;
+   struct draw_context *draw;
+
+   unsigned prim;
+
+   struct draw_pt_middle_end *middle;
+
+   unsigned max_vertices;
+   ushort segment_size;
+
+   /* buffers for splitting */
+   unsigned fetch_elts[SEGMENT_SIZE];
+   ushort draw_elts[SEGMENT_SIZE];
+   ushort identity_draw_elts[SEGMENT_SIZE];
+
+   struct {
+      /* map a fetch element to a draw element */
+      unsigned fetches[MAP_SIZE];
+      ushort draws[MAP_SIZE];
+      boolean has_max_fetch;
+
+      ushort num_fetch_elts;
+      ushort num_draw_elts;
+   } cache;
+};
+
+
+static void
+vsplit_clear_cache(struct vsplit_frontend *vsplit)
+{
+   memset(vsplit->cache.fetches, 0xff, sizeof(vsplit->cache.fetches));
+   vsplit->cache.has_max_fetch = FALSE;
+   vsplit->cache.num_fetch_elts = 0;
+   vsplit->cache.num_draw_elts = 0;
+}
+
+static void
+vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags)
+{
+   vsplit->middle->run(vsplit->middle,
+         vsplit->fetch_elts, vsplit->cache.num_fetch_elts,
+         vsplit->draw_elts, vsplit->cache.num_draw_elts, flags);
+}
+
+/**
+ * Add a fetch element and add it to the draw elements.
+ */
+static INLINE void
+vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+   unsigned hash = fetch % MAP_SIZE;
+
+   if (vsplit->cache.fetches[hash] != fetch) {
+      /* update cache */
+      vsplit->cache.fetches[hash] = fetch;
+      vsplit->cache.draws[hash] = vsplit->cache.num_fetch_elts;
+
+      /* add fetch */
+      assert(vsplit->cache.num_fetch_elts < vsplit->segment_size);
+      vsplit->fetch_elts[vsplit->cache.num_fetch_elts++] = fetch;
+   }
+
+   vsplit->draw_elts[vsplit->cache.num_draw_elts++] = vsplit->cache.draws[hash];
+}
+
+
+/**
+ * Add a fetch element and add it to the draw elements.  The fetch element is
+ * in full range (uint).
+ */
+static INLINE void
+vsplit_add_cache_uint(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+   /* special care for 0xffffffff */
+   if (fetch == 0xffffffff && !vsplit->cache.has_max_fetch) {
+      unsigned hash = fetch % MAP_SIZE;
+      vsplit->cache.fetches[hash] = fetch - 1; /* force update */
+      vsplit->cache.has_max_fetch = TRUE;
+   }
+
+   vsplit_add_cache(vsplit, fetch);
+}
+
+
+#define FUNC vsplit_run_linear
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ubyte
+#define ELT_TYPE ubyte
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ushort
+#define ELT_TYPE ushort
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_uint
+#define ELT_TYPE uint
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache_uint(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+
+static void vsplit_prepare(struct draw_pt_front_end *frontend,
+                           unsigned in_prim,
+                           struct draw_pt_middle_end *middle,
+                           unsigned opt)
+{
+   struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+
+   switch (vsplit->draw->pt.user.eltSize) {
+   case 0:
+      vsplit->base.run = vsplit_run_linear;
+      break;
+   case 1:
+      vsplit->base.run = vsplit_run_ubyte;
+      break;
+   case 2:
+      vsplit->base.run = vsplit_run_ushort;
+      break;
+   case 4:
+      vsplit->base.run = vsplit_run_uint;
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
+   /* split only */
+   vsplit->prim = in_prim;
+
+   vsplit->middle = middle;
+   middle->prepare(middle, vsplit->prim, opt, &vsplit->max_vertices);
+
+   vsplit->segment_size = MIN2(SEGMENT_SIZE, vsplit->max_vertices);
+}
+
+
+static void vsplit_finish(struct draw_pt_front_end *frontend)
+{
+   struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+   vsplit->middle->finish(vsplit->middle);
+   vsplit->middle = NULL;
+}
+
+
+static void vsplit_destroy(struct draw_pt_front_end *frontend)
+{
+   FREE(frontend);
+}
+
+
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw)
+{
+   struct vsplit_frontend *vsplit = CALLOC_STRUCT(vsplit_frontend);
+   ushort i;
+
+   if (!vsplit)
+      return NULL;
+
+   vsplit->base.prepare = vsplit_prepare;
+   vsplit->base.run     = NULL;
+   vsplit->base.finish  = vsplit_finish;
+   vsplit->base.destroy = vsplit_destroy;
+   vsplit->draw = draw;
+
+   for (i = 0; i < SEGMENT_SIZE; i++)
+      vsplit->identity_draw_elts[i] = i;
+
+   return &vsplit->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
new file mode 100644
index 0000000000..efeaa56711
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -0,0 +1,301 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#define CONCAT2(name, elt_type) name ## elt_type
+#define CONCAT(name, elt_type) CONCAT2(name, elt_type)
+
+#ifdef ELT_TYPE
+
+/**
+ * Fetch all elements in [min_index, max_index] with bias, and use the
+ * (rebased) index buffer as the draw elements.
+ */
+static boolean
+CONCAT(vsplit_segment_fast_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+                                       unsigned flags,
+                                       unsigned istart, unsigned icount)
+{
+   struct draw_context *draw = vsplit->draw;
+   const ELT_TYPE *ib = (const ELT_TYPE *) draw->pt.user.elts;
+   const unsigned min_index = draw->pt.user.min_index;
+   const unsigned max_index = draw->pt.user.max_index;
+   const int elt_bias = draw->pt.user.eltBias;
+   unsigned fetch_start, fetch_count;
+   const ushort *draw_elts;
+   unsigned i;
+
+   assert(icount <= vsplit->segment_size);
+
+   /* this is faster only when we fetch less elements than the normal path */
+   if (max_index - min_index > icount - 1)
+      return FALSE;
+
+   if (elt_bias < 0 && min_index < -elt_bias)
+      return FALSE;
+
+   /* why this check? */
+   for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+      if (draw->pt.vertex_element[i].instance_divisor)
+         return FALSE;
+   }
+
+   fetch_start = min_index + elt_bias;
+   fetch_count = max_index - min_index + 1;
+
+   if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
+      for (i = 0; i < icount; i++) {
+         ELT_TYPE idx = ib[istart + i];
+         assert(idx >= min_index && idx <= max_index);
+      }
+      draw_elts = (const ushort *) ib;
+   }
+   else {
+      if (min_index == 0) {
+         for (i = 0; i < icount; i++) {
+            ELT_TYPE idx = ib[istart + i];
+
+            assert(idx >= min_index && idx <= max_index);
+            vsplit->draw_elts[i] = (ushort) idx;
+         }
+      }
+      else {
+         for (i = 0; i < icount; i++) {
+            ELT_TYPE idx = ib[istart + i];
+
+            assert(idx >= min_index && idx <= max_index);
+            vsplit->draw_elts[i] = (ushort) (idx - min_index);
+         }
+      }
+
+      draw_elts = vsplit->draw_elts;
+   }
+
+   return vsplit->middle->run_linear_elts(vsplit->middle,
+                                          fetch_start, fetch_count,
+                                          draw_elts, icount, flags);
+}
+
+/**
+ * Use the cache to prepare the fetch and draw elements, and flush.
+ *
+ * When spoken is TRUE, ispoken replaces istart;  When close is TRUE, iclose is
+ * appended.
+ */
+static INLINE void
+CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+                                        unsigned flags,
+                                        unsigned istart, unsigned icount,
+                                        boolean spoken, unsigned ispoken,
+                                        boolean close, unsigned iclose)
+{
+   struct draw_context *draw = vsplit->draw;
+   const ELT_TYPE *ib = (const ELT_TYPE *) draw->pt.user.elts;
+   const int ibias = draw->pt.user.eltBias;
+   unsigned i;
+
+   assert(icount + !!close <= vsplit->segment_size);
+
+   vsplit_clear_cache(vsplit);
+
+   spoken = !!spoken;
+   if (ibias == 0) {
+      if (spoken)
+         ADD_CACHE(vsplit, ib[ispoken]);
+
+      for (i = spoken; i < icount; i++)
+         ADD_CACHE(vsplit, ib[istart + i]);
+
+      if (close)
+         ADD_CACHE(vsplit, ib[iclose]);
+   }
+   else if (ibias > 0) {
+      if (spoken)
+         ADD_CACHE(vsplit, (uint) ib[ispoken] + ibias);
+
+      for (i = spoken; i < icount; i++)
+         ADD_CACHE(vsplit, (uint) ib[istart + i] + ibias);
+
+      if (close)
+         ADD_CACHE(vsplit, (uint) ib[iclose] + ibias);
+   }
+   else {
+      if (spoken) {
+         if (ib[ispoken] < -ibias)
+            return;
+         ADD_CACHE(vsplit, ib[ispoken] + ibias);
+      }
+
+      for (i = spoken; i < icount; i++) {
+         if (ib[istart + i] < -ibias)
+            return;
+         ADD_CACHE(vsplit, ib[istart + i] + ibias);
+      }
+
+      if (close) {
+         if (ib[iclose] < -ibias)
+            return;
+         ADD_CACHE(vsplit, ib[iclose] + ibias);
+      }
+   }
+
+   vsplit_flush_cache(vsplit, flags);
+}
+
+static void
+CONCAT(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+                                         unsigned flags,
+                                         unsigned istart,
+                                         unsigned icount)
+{
+   /* the primitive is not splitted */
+   if (!(flags)) {
+      if (CONCAT(vsplit_segment_fast_, ELT_TYPE)(vsplit,
+               flags, istart, icount))
+         return;
+   }
+   CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+         flags, istart, icount, FALSE, 0, FALSE, 0);
+}
+
+static void
+CONCAT(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+                                       unsigned flags,
+                                       unsigned istart,
+                                       unsigned icount,
+                                       unsigned i0)
+{
+   const boolean close_loop = ((flags) == DRAW_SPLIT_BEFORE);
+
+   CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+         flags, istart, icount, FALSE, 0, close_loop, i0);
+}
+
+static void
+CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+                                      unsigned flags,
+                                      unsigned istart,
+                                      unsigned icount,
+                                      unsigned i0)
+{
+   const boolean use_spoken = (((flags) & DRAW_SPLIT_BEFORE) != 0);
+
+   CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+         flags, istart, icount, use_spoken, i0, FALSE, 0);
+}
+
+#define LOCAL_VARS                                                         \
+   struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;   \
+   const unsigned prim = vsplit->prim;                                     \
+   const unsigned max_count_simple = vsplit->segment_size;                 \
+   const unsigned max_count_loop = vsplit->segment_size - 1;               \
+   const unsigned max_count_fan = vsplit->segment_size;
+
+#else /* ELT_TYPE */
+
+static void
+vsplit_segment_simple_linear(struct vsplit_frontend *vsplit, unsigned flags,
+                             unsigned istart, unsigned icount)
+{
+   assert(icount <= vsplit->max_vertices);
+   vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+}
+
+static void
+vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags,
+                           unsigned istart, unsigned icount, unsigned i0)
+{
+   boolean close_loop = (flags == DRAW_SPLIT_BEFORE);
+   unsigned nr;
+
+   assert(icount + !!close_loop <= vsplit->segment_size);
+
+   if (close_loop) {
+      for (nr = 0; nr < icount; nr++)
+         vsplit->fetch_elts[nr] = istart + nr;
+      vsplit->fetch_elts[nr++] = i0;
+
+      vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+            vsplit->identity_draw_elts, nr, flags);
+   }
+   else {
+      vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+   }
+}
+
+static void
+vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags,
+                          unsigned istart, unsigned icount, unsigned i0)
+{
+   boolean use_spoken = ((flags & DRAW_SPLIT_BEFORE) != 0);
+   unsigned nr = 0, i;
+
+   assert(icount + !!use_spoken <= vsplit->segment_size);
+
+   if (use_spoken) {
+      vsplit->fetch_elts[nr++] = i0;
+      for (i = 1 ; i < icount; i++)
+         vsplit->fetch_elts[nr++] = istart + i;
+
+      vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+            vsplit->identity_draw_elts, nr, flags);
+   }
+   else {
+      vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+   }
+}
+
+#define LOCAL_VARS                                                         \
+   struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;   \
+   const unsigned prim = vsplit->prim;                                     \
+   const unsigned max_count_simple = vsplit->max_vertices;                 \
+   const unsigned max_count_loop = vsplit->segment_size - 1;               \
+   const unsigned max_count_fan = vsplit->segment_size;
+
+#define ELT_TYPE linear
+
+#endif /* ELT_TYPE */
+
+#define FUNC_VARS                      \
+   struct draw_pt_front_end *frontend, \
+   unsigned start,                     \
+   unsigned count
+
+#define SEGMENT_SIMPLE(flags, istart, icount)   \
+   CONCAT(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount)
+
+#define SEGMENT_LOOP(flags, istart, icount, i0) \
+   CONCAT(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#define SEGMENT_FAN(flags, istart, icount, i0)  \
+   CONCAT(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#include "draw_split_tmp.h"
+
+#undef CONCAT2
+#undef CONCAT
+
+#undef ELT_TYPE
+#undef ADD_CACHE
diff --git a/src/gallium/auxiliary/draw/draw_split_tmp.h b/src/gallium/auxiliary/draw/draw_split_tmp.h
new file mode 100644
index 0000000000..40ab0b71f1
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_split_tmp.h
@@ -0,0 +1,171 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.9
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+static void
+FUNC(FUNC_VARS)
+{
+   unsigned first, incr;
+   LOCAL_VARS
+
+   /*
+    * prim, start, count, and max_count_{simple,loop,fan} should have been
+    * defined
+    */
+   if (0) {
+      debug_printf("%s: prim 0x%x, start %d, count %d, max_count_simple %d, "
+                   "max_count_loop %d, max_count_fan %d\n",
+                   __FUNCTION__, prim, start, count, max_count_simple,
+                   max_count_loop, max_count_fan);
+   }
+
+   draw_pt_split_prim(prim, &first, &incr);
+   /* sanitize primitive length */
+   count = draw_pt_trim_count(count, first, incr);
+   if (count < first)
+      return;
+
+   /* must be able to at least flush two complete primitives */
+   assert(max_count_simple >= first + incr &&
+          max_count_loop >= first + incr &&
+          max_count_fan >= first + incr);
+
+   /* no splitting required */
+   if (count <= max_count_simple) {
+      SEGMENT_SIMPLE(0x0, start, count);
+   }
+   else {
+      const unsigned rollback = first - incr;
+      unsigned flags = DRAW_SPLIT_AFTER, seg_start = 0, seg_max;
+
+      /*
+       * Both count and seg_max below are explicitly trimmed.  Because
+       *
+       *   seg_start = N * (seg_max - rollback) = N' * incr,
+       *
+       * we have
+       *
+       *   remaining = count - seg_start = first + N'' * incr.
+       *
+       * That is, remaining is implicitly trimmed.
+       */
+      switch (prim) {
+      case PIPE_PRIM_POINTS:
+      case PIPE_PRIM_LINES:
+      case PIPE_PRIM_LINE_STRIP:
+      case PIPE_PRIM_TRIANGLES:
+      case PIPE_PRIM_TRIANGLE_STRIP:
+      case PIPE_PRIM_QUADS:
+      case PIPE_PRIM_QUAD_STRIP:
+      case PIPE_PRIM_LINES_ADJACENCY:
+      case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+      case PIPE_PRIM_TRIANGLES_ADJACENCY:
+      case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+         seg_max =
+            draw_pt_trim_count(MIN2(max_count_simple, count), first, incr);
+         if (prim == PIPE_PRIM_TRIANGLE_STRIP ||
+             prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY) {
+            /* make sure we flush even number of triangles at a time */
+            if (seg_max < count && !(((seg_max - first) / incr) & 1))
+               seg_max -= incr;
+         }
+
+         do {
+            const unsigned remaining = count - seg_start;
+
+            if (remaining > seg_max) {
+               SEGMENT_SIMPLE(flags, start + seg_start, seg_max);
+               seg_start += seg_max - rollback;
+
+               flags |= DRAW_SPLIT_BEFORE;
+            }
+            else {
+               flags &= ~DRAW_SPLIT_AFTER;
+
+               SEGMENT_SIMPLE(flags, start + seg_start, remaining);
+               seg_start += remaining;
+            }
+         } while (seg_start < count);
+         break;
+
+      case PIPE_PRIM_LINE_LOOP:
+         seg_max =
+            draw_pt_trim_count(MIN2(max_count_loop, count), first, incr);
+
+         do {
+            const unsigned remaining = count - seg_start;
+
+            if (remaining > seg_max) {
+               SEGMENT_LOOP(flags, start + seg_start, seg_max, start);
+               seg_start += seg_max - rollback;
+
+               flags |= DRAW_SPLIT_BEFORE;
+            }
+            else {
+               flags &= ~DRAW_SPLIT_AFTER;
+
+               SEGMENT_LOOP(flags, start + seg_start, remaining, start);
+               seg_start += remaining;
+            }
+         } while (seg_start < count);
+         break;
+
+      case PIPE_PRIM_TRIANGLE_FAN:
+      case PIPE_PRIM_POLYGON:
+         seg_max =
+            draw_pt_trim_count(MIN2(max_count_fan, count), first, incr);
+
+         do {
+            const unsigned remaining = count - seg_start;
+
+            if (remaining > seg_max) {
+               SEGMENT_FAN(flags, start + seg_start, seg_max, start);
+               seg_start += seg_max - rollback;
+
+               flags |= DRAW_SPLIT_BEFORE;
+            }
+            else {
+               flags &= ~DRAW_SPLIT_AFTER;
+
+               SEGMENT_FAN(flags, start + seg_start, remaining, start);
+               seg_start += remaining;
+            }
+         } while (seg_start < count);
+         break;
+
+      default:
+         assert(0);
+         break;
+      }
+   }
+}
+
+#undef FUNC
+#undef FUNC_VARS
+#undef LOCAL_VARS
+
+#undef SEGMENT_SIMPLE
+#undef SEGMENT_LOOP
+#undef SEGMENT_FAN
-- 
cgit v1.2.3


From 5b6bf799e637e9020af3a4bebe514b53d7c38eca Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sat, 7 Aug 2010 15:12:14 +0800
Subject: draw: Replace varray by vsplit.

vsplit is a superset of varray.  It sets the split flags comparing to
varray.
---
 src/gallium/auxiliary/draw/draw_private.h |  2 +-
 src/gallium/auxiliary/draw/draw_pt.c      | 15 ++++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 826f5dc98c..18b632e3d9 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -141,7 +141,7 @@ struct draw_context
 
       struct {
          struct draw_pt_front_end *vcache;
-         struct draw_pt_front_end *varray;
+         struct draw_pt_front_end *vsplit;
       } front;
 
       struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 2b400eda0f..b6debbecf5 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -111,8 +111,9 @@ draw_pt_arrays(struct draw_context *draw,
     */
    if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
       frontend = draw->pt.front.vcache;
-   } else {
-      frontend = draw->pt.front.varray;
+   }
+   else {
+      frontend = draw->pt.front.vsplit;
    }
 
    frontend->prepare( frontend, prim, middle, opt );
@@ -134,8 +135,8 @@ boolean draw_pt_init( struct draw_context *draw )
    if (!draw->pt.front.vcache)
       return FALSE;
 
-   draw->pt.front.varray = draw_pt_varray(draw);
-   if (!draw->pt.front.varray)
+   draw->pt.front.vsplit = draw_pt_vsplit(draw);
+   if (!draw->pt.front.vsplit)
       return FALSE;
 
    draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
@@ -186,9 +187,9 @@ void draw_pt_destroy( struct draw_context *draw )
       draw->pt.front.vcache = NULL;
    }
 
-   if (draw->pt.front.varray) {
-      draw->pt.front.varray->destroy( draw->pt.front.varray );
-      draw->pt.front.varray = NULL;
+   if (draw->pt.front.vsplit) {
+      draw->pt.front.vsplit->destroy( draw->pt.front.vsplit );
+      draw->pt.front.vsplit = NULL;
    }
 }
 
-- 
cgit v1.2.3


From 5a085c623faebf957be3fae2f82dc89ef6214585 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sat, 7 Aug 2010 20:44:02 +0800
Subject: draw: Replace vcache by vsplit.

vcache decomposes primitives while vsplit splits primitives.  Splitting
is generally easier to do and is faster.  More importantly, vcache
depends on flatshade_first to decompose.  The outputs may have incorrect
vertex order which is significant to GS.
---
 src/gallium/auxiliary/draw/draw_pipe.c    |  8 ++------
 src/gallium/auxiliary/draw/draw_private.h |  1 -
 src/gallium/auxiliary/draw/draw_pt.c      | 21 ++-------------------
 3 files changed, 4 insertions(+), 26 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 6a9e4d5e90..43c25167a9 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -169,10 +169,6 @@ static void do_triangle( struct draw_context *draw,
 /*
  * Set up macros for draw_pt_decompose.h template code.
  * This code uses vertex indexes / elements.
- *
- * Flags are needed by the stipple and unfilled stages.  When the two stages
- * are active, vcache_run_extras is called and the flags are stored in the
- * higher bits of i0.  Otherwise, flags do not matter.
  */
 
 #define TRIANGLE(flags,i0,i1,i2)                                  \
@@ -180,7 +176,7 @@ static void do_triangle( struct draw_context *draw,
       assert(!((i1) & DRAW_PIPE_FLAG_MASK));                      \
       assert(!((i2) & DRAW_PIPE_FLAG_MASK));                      \
       do_triangle( draw,                                          \
-                   i0,  /* flags */                               \
+                   flags,                                         \
                    verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK),  \
                    verts + stride * (i1),                         \
                    verts + stride * (i2) );                       \
@@ -190,7 +186,7 @@ static void do_triangle( struct draw_context *draw,
    do {                                                           \
       assert(!((i1) & DRAW_PIPE_FLAG_MASK));                      \
       do_line( draw,                                              \
-               i0, /* flags */                                    \
+               flags,                                             \
                verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK),      \
                verts + stride * (i1) );                           \
    } while (0)
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 18b632e3d9..94b688f891 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -140,7 +140,6 @@ struct draw_context
       } middle;
 
       struct {
-         struct draw_pt_front_end *vcache;
          struct draw_pt_front_end *vsplit;
       } front;
 
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index b6debbecf5..b80fc8f552 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -48,7 +48,7 @@ DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
 #endif
 
 /* Overall we split things into:
- *     - frontend -- prepare fetch_elts, draw_elts - eg vcache
+ *     - frontend -- prepare fetch_elts, draw_elts - eg vsplit
  *     - middle   -- fetch, shade, cliptest, viewport
  *     - pipeline -- the prim pipeline: clipping, wide lines, etc 
  *     - backend  -- the vbuf_render provided by the driver.
@@ -106,15 +106,7 @@ draw_pt_arrays(struct draw_context *draw,
          middle = draw->pt.middle.general;
    }
 
-
-   /* Pick the right frontend
-    */
-   if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
-      frontend = draw->pt.front.vcache;
-   }
-   else {
-      frontend = draw->pt.front.vsplit;
-   }
+   frontend = draw->pt.front.vsplit;
 
    frontend->prepare( frontend, prim, middle, opt );
 
@@ -131,10 +123,6 @@ boolean draw_pt_init( struct draw_context *draw )
    draw->pt.test_fse = debug_get_option_draw_fse();
    draw->pt.no_fse = debug_get_option_draw_no_fse();
 
-   draw->pt.front.vcache = draw_pt_vcache( draw );
-   if (!draw->pt.front.vcache)
-      return FALSE;
-
    draw->pt.front.vsplit = draw_pt_vsplit(draw);
    if (!draw->pt.front.vsplit)
       return FALSE;
@@ -182,11 +170,6 @@ void draw_pt_destroy( struct draw_context *draw )
       draw->pt.middle.fetch_shade_emit = NULL;
    }
 
-   if (draw->pt.front.vcache) {
-      draw->pt.front.vcache->destroy( draw->pt.front.vcache );
-      draw->pt.front.vcache = NULL;
-   }
-
    if (draw->pt.front.vsplit) {
       draw->pt.front.vsplit->destroy( draw->pt.front.vsplit );
       draw->pt.front.vsplit = NULL;
-- 
cgit v1.2.3


From a97419a3ba86fd112a22b5786c4f34f8d8a54f2d Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sat, 7 Aug 2010 15:18:24 +0800
Subject: draw: Remove varray and vcache.

They have been deprecated by vsplit.
---
 src/gallium/auxiliary/Makefile                     |   3 -
 src/gallium/auxiliary/SConscript                   |   3 -
 src/gallium/auxiliary/draw/draw_pt.h               |  21 +-
 src/gallium/auxiliary/draw/draw_pt_elts.c          |  89 ---
 src/gallium/auxiliary/draw/draw_pt_varray.c        | 202 -------
 src/gallium/auxiliary/draw/draw_pt_varray_tmp.h    | 238 --------
 .../auxiliary/draw/draw_pt_varray_tmp_linear.h     |  98 ----
 src/gallium/auxiliary/draw/draw_pt_vcache.c        | 611 ---------------------
 src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h    |  21 -
 9 files changed, 2 insertions(+), 1284 deletions(-)
 delete mode 100644 src/gallium/auxiliary/draw/draw_pt_elts.c
 delete mode 100644 src/gallium/auxiliary/draw/draw_pt_varray.c
 delete mode 100644 src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
 delete mode 100644 src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
 delete mode 100644 src/gallium/auxiliary/draw/draw_pt_vcache.c
 delete mode 100644 src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index ac3828c513..eb2a40cbaa 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -26,7 +26,6 @@ C_SOURCES = \
 	draw/draw_pipe_wide_line.c \
 	draw/draw_pipe_wide_point.c \
 	draw/draw_pt.c \
-	draw/draw_pt_elts.c \
 	draw/draw_pt_emit.c \
 	draw/draw_pt_fetch.c \
 	draw/draw_pt_fetch_emit.c \
@@ -35,8 +34,6 @@ C_SOURCES = \
 	draw/draw_pt_post_vs.c \
 	draw/draw_pt_so_emit.c \
 	draw/draw_pt_util.c \
-	draw/draw_pt_varray.c \
-	draw/draw_pt_vcache.c \
 	draw/draw_pt_vsplit.c \
 	draw/draw_vertex.c \
 	draw/draw_vs.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 89d1caf116..30e5d02c9b 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -71,7 +71,6 @@ source = [
     'draw/draw_pipe_wide_line.c',
     'draw/draw_pipe_wide_point.c',
     'draw/draw_pt.c',
-    'draw/draw_pt_elts.c',
     'draw/draw_pt_emit.c',
     'draw/draw_pt_fetch.c',
     'draw/draw_pt_fetch_emit.c',
@@ -80,8 +79,6 @@ source = [
     'draw/draw_pt_post_vs.c',
     'draw/draw_pt_so_emit.c',
     'draw/draw_pt_util.c',
-    'draw/draw_pt_varray.c',
-    'draw/draw_pt_vcache.c',
     'draw/draw_pt_vsplit.c',
     'draw/draw_vertex.c',
     'draw/draw_vs.c',
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index de3f638db5..0db5666529 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -35,8 +35,6 @@
 
 #include "pipe/p_compiler.h"
 
-typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx );
-
 struct draw_pt_middle_end;
 struct draw_context;
 struct draw_prim_info;
@@ -62,14 +60,8 @@ struct draw_vertex_info;
  * the draw elements (as well as the fetch elements) are splitted and the
  * middle end is called multiple times.
  *
- * Currenly there are:
- *    - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims
+ * Currenly there is:
  *    - vsplit - catchall implementation, splits big prims
- * Later:
- *    - varray, varray_split
- *    - velement, velement_split
- *
- * Currenly only using the vcache version.
  */
 struct draw_pt_front_end {
    void (*prepare)( struct draw_pt_front_end *,
@@ -136,19 +128,10 @@ struct vbuf_render;
 struct vertex_header;
 
 
-/* Helper functions.
- */
-pt_elt_func draw_pt_elt_func( struct draw_context *draw );
-const void *draw_pt_elt_ptr( struct draw_context *draw,
-                             unsigned start );
-
 /* Frontends: 
  *
- * Currently only the general-purpose vcache implementation, could add
- * a special case for tiny vertex buffers.
+ * Currently only the general-purpose vsplit implementation.
  */
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
 struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw);
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
deleted file mode 100644
index 88f4d9f495..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_elts.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  */
-
-#include "draw/draw_pt.h"
-#include "draw/draw_private.h"
-
-/* Neat get_elt func that also works for varrays drawing by encoding
- * the start value into a pointer.  
- */
-
-static unsigned elt_uint( const void *elts, unsigned idx )
-{
-   return *(((const uint *)elts) + idx);
-}
-
-static unsigned elt_ushort( const void *elts, unsigned idx )
-{
-   return *(((const ushort *)elts) + idx);
-}
-
-static unsigned elt_ubyte( const void *elts, unsigned idx )
-{
-   return *(((const ubyte *)elts) + idx);
-}
-
-static unsigned elt_vert( const void *elts, unsigned idx )
-{
-   /* unsigned index is packed in the pointer */
-   return (unsigned)(uintptr_t)elts + idx;
-}
-
-pt_elt_func draw_pt_elt_func( struct draw_context *draw )
-{
-   switch (draw->pt.user.eltSize) {
-   case 0: return &elt_vert;
-   case 1: return &elt_ubyte;
-   case 2: return &elt_ushort; 
-   case 4: return &elt_uint;
-   default: return NULL;
-   }
-}     
-
-const void *draw_pt_elt_ptr( struct draw_context *draw,
-                             unsigned start )
-{
-   const char *elts = draw->pt.user.elts;
-
-   switch (draw->pt.user.eltSize) {
-   case 0: 
-      return (const void *)(((const ubyte *)NULL) + start);
-   case 1: 
-      return (const void *)(((const ubyte *)elts) + start);
-   case 2: 
-      return (const void *)(((const ushort *)elts) + start);
-   case 4: 
-      return (const void *)(((const uint *)elts) + start);
-   default:
-      return NULL;
-   }
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
deleted file mode 100644
index 2cda4f018d..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-#define FETCH_MAX 256
-#define DRAW_MAX (FETCH_MAX+8)
-
-struct varray_frontend {
-   struct draw_pt_front_end base;
-   struct draw_context *draw;
-
-   ushort draw_elts[DRAW_MAX];
-   unsigned fetch_elts[FETCH_MAX];
-
-   unsigned driver_fetch_max;
-   unsigned fetch_max;
-
-   struct draw_pt_middle_end *middle;
-
-   unsigned input_prim;
-   unsigned output_prim;
-};
-
-
-static void varray_flush_linear(struct varray_frontend *varray,
-                                unsigned start, unsigned count)
-{
-   if (count) {
-      assert(varray->middle->run_linear);
-      varray->middle->run_linear(varray->middle, start, count, 0x0);
-   }
-}
-
-static void varray_line_loop_segment(struct varray_frontend *varray,
-                                     unsigned start,
-                                     unsigned segment_start,
-                                     unsigned segment_count,
-                                     boolean end )
-{
-   assert(segment_count < varray->fetch_max);
-   if (segment_count >= 1) {
-      unsigned nr = 0, i;
-
-      for (i = 0; i < segment_count; i++) 
-         varray->fetch_elts[nr++] = start + segment_start + i;
-
-      if (end) 
-         varray->fetch_elts[nr++] = start;
-
-      assert(nr <= FETCH_MAX);
-
-      varray->middle->run(varray->middle, 
-                          varray->fetch_elts,
-                          nr,
-                          varray->draw_elts, /* ie. linear */
-                          nr,
-                          0x0);
-   }
-}
-
-
-
-static void varray_fan_segment(struct varray_frontend *varray,
-                               unsigned start, 
-                               unsigned segment_start,
-                               unsigned segment_count )
-{
-   assert(segment_count < varray->fetch_max);
-   if (segment_count >= 2) {
-      unsigned nr = 0, i;
-
-      if (segment_start != 0)
-         varray->fetch_elts[nr++] = start;
-
-      for (i = 0 ; i < segment_count; i++) 
-         varray->fetch_elts[nr++] = start + segment_start + i;
-
-      assert(nr <= FETCH_MAX);
-
-      varray->middle->run(varray->middle, 
-                          varray->fetch_elts,
-                          nr,
-                          varray->draw_elts, /* ie. linear */
-                          nr,
-                          0x0);
-   }
-}
-
-
-
-
-#define FUNC varray_run
-#include "draw_pt_varray_tmp_linear.h"
-
-static unsigned decompose_prim[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY + 1] = {
-   PIPE_PRIM_POINTS,
-   PIPE_PRIM_LINES,
-   PIPE_PRIM_LINE_STRIP,        /* decomposed LINELOOP */
-   PIPE_PRIM_LINE_STRIP,
-   PIPE_PRIM_TRIANGLES,
-   PIPE_PRIM_TRIANGLE_STRIP,
-   PIPE_PRIM_TRIANGLE_FAN,
-   PIPE_PRIM_QUADS,
-   PIPE_PRIM_QUAD_STRIP,
-   PIPE_PRIM_POLYGON,
-   PIPE_PRIM_LINES_ADJACENCY,
-   PIPE_PRIM_LINE_STRIP_ADJACENCY,
-   PIPE_PRIM_TRIANGLES_ADJACENCY,
-   PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
-};
-
-
-
-static void varray_prepare(struct draw_pt_front_end *frontend,
-                           unsigned in_prim,
-                           struct draw_pt_middle_end *middle,
-                           unsigned opt)
-{
-   struct varray_frontend *varray = (struct varray_frontend *)frontend;
-
-   varray->base.run = varray_run;
-
-   varray->input_prim = in_prim;
-   assert(in_prim < Elements(decompose_prim));
-   varray->output_prim = decompose_prim[in_prim];
-
-   varray->middle = middle;
-   middle->prepare(middle,
-                   varray->output_prim,
-                   opt, &varray->driver_fetch_max );
-
-   /* check that the max is even */
-   assert((varray->driver_fetch_max & 1) == 0);
-
-   varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max);
-}
-
-
-
-
-static void varray_finish(struct draw_pt_front_end *frontend)
-{
-   struct varray_frontend *varray = (struct varray_frontend *)frontend;
-   varray->middle->finish(varray->middle);
-   varray->middle = NULL;
-}
-
-static void varray_destroy(struct draw_pt_front_end *frontend)
-{
-   FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw)
-{
-   ushort i;
-   struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend);
-   if (varray == NULL)
-      return NULL;
-
-   varray->base.prepare = varray_prepare;
-   varray->base.run     = NULL;
-   varray->base.finish  = varray_finish;
-   varray->base.destroy = varray_destroy;
-   varray->draw = draw;
-
-   for (i = 0; i < DRAW_MAX; i++) {
-      varray->draw_elts[i] = i;
-   }
-
-   return &varray->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
deleted file mode 100644
index 7c722457c3..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ /dev/null
@@ -1,238 +0,0 @@
-
-static void FUNC(struct draw_pt_front_end *frontend,
-                 pt_elt_func get_elt,
-                 const void *elts,
-                 unsigned count)
-{
-   struct varray_frontend *varray = (struct varray_frontend *)frontend;
-   struct draw_context *draw = varray->draw;
-   unsigned start = (unsigned)elts;
-
-   boolean flatfirst = (draw->rasterizer->flatshade &&
-                        draw->rasterizer->flatshade_first);
-   unsigned i, j;
-   ushort flags;
-   unsigned first, incr;
-
-   varray->fetch_start = start;
-
-   draw_pt_split_prim(varray->input_prim, &first, &incr);
-
-#if 0
-   debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
-                varray->input_prim,
-                start, count);
-#endif
-
-   switch (varray->input_prim) {
-   case PIPE_PRIM_POINTS:
-      for (j = 0; j + first <= count; j += i) {
-         unsigned end = MIN2(FETCH_MAX, count - j);
-         end -= (end % incr);
-         for (i = 0; i < end; i++) {
-            POINT(varray, i + 0);
-         }
-         i = end;
-         fetch_init(varray, end);
-         varray_flush(varray);
-      }
-      break;
-
-   case PIPE_PRIM_LINES:
-      for (j = 0; j + first <= count; j += i) {
-         unsigned end = MIN2(FETCH_MAX, count - j);
-         end -= (end % incr);
-         for (i = 0; i+1 < end; i += 2) {
-            LINE(varray, DRAW_PIPE_RESET_STIPPLE,
-                 i + 0, i + 1);
-         }
-         i = end;
-         fetch_init(varray, end);
-         varray_flush(varray);
-      }
-      break;
-
-   case PIPE_PRIM_LINE_LOOP:
-      if (count >= 2) {
-         flags = DRAW_PIPE_RESET_STIPPLE;
-
-         for (j = 0; j + first <= count; j += i) {
-            unsigned end = MIN2(FETCH_MAX, count - j);
-            end -= (end % incr);
-            for (i = 1; i < end; i++, flags = 0) {
-               LINE(varray, flags, i - 1, i);
-            }
-            LINE(varray, flags, i - 1, 0);
-            i = end;
-            fetch_init(varray, end);
-            varray_flush(varray);
-         }
-      }
-      break;
-
-   case PIPE_PRIM_LINE_STRIP:
-      flags = DRAW_PIPE_RESET_STIPPLE;
-      for (j = 0; j + first <= count; j += i) {
-         unsigned end = MIN2(FETCH_MAX, count - j);
-         end -= (end % incr);
-         for (i = 1; i < end; i++, flags = 0) {
-            LINE(varray, flags, i - 1, i);
-         }
-         i = end;
-         fetch_init(varray, end);
-         varray_flush(varray);
-      }
-      break;
-
-   case PIPE_PRIM_TRIANGLES:
-      for (j = 0; j + first <= count; j += i) {
-         unsigned end = MIN2(FETCH_MAX, count - j);
-         end -= (end % incr);
-         for (i = 0; i+2 < end; i += 3) {
-            TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
-                     i + 0, i + 1, i + 2);
-         }
-         i = end;
-         fetch_init(varray, end);
-         varray_flush(varray);
-      }
-      break;
-
-   case PIPE_PRIM_TRIANGLE_STRIP:
-      if (flatfirst) {
-         for (j = 0; j + first <= count; j += i) {
-            unsigned end = MIN2(FETCH_MAX, count - j);
-            end -= (end % incr);
-            for (i = 0; i+2 < end; i++) {
-               TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
-                        i + 0, i + 1 + (i&1), i + 2 - (i&1));
-            }
-            i = end;
-            fetch_init(varray, end);
-            varray_flush(varray);
-            if (j + first + i <= count) {
-               varray->fetch_start -= 2;
-               i -= 2;
-            }
-         }
-      }
-      else {
-         for (j = 0; j + first <= count; j += i) {
-            unsigned end = MIN2(FETCH_MAX, count - j);
-            end -= (end  % incr);
-            for (i = 0; i + 2 < end; i++) {
-               TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
-                        i + 0 + (i&1), i + 1 - (i&1), i + 2);
-            }
-            i = end;
-            fetch_init(varray, end);
-            varray_flush(varray);
-            if (j + first + i <= count) {
-               varray->fetch_start -= 2;
-               i -= 2;
-            }
-         }
-      }
-      break;
-
-   case PIPE_PRIM_TRIANGLE_FAN:
-      if (count >= 3) {
-         if (flatfirst) {
-            flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
-            for (j = 0; j + first <= count; j += i) {
-               unsigned end = MIN2(FETCH_MAX, count - j);
-               end -= (end % incr);
-               for (i = 0; i+2 < end; i++) {
-                  TRIANGLE(varray, flags, i + 1, i + 2, 0);
-               }
-               i = end;
-               fetch_init(varray, end);
-               varray_flush(varray);
-            }
-         }
-         else {
-            flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
-            for (j = 0; j + first <= count; j += i) {
-               unsigned end = MIN2(FETCH_MAX, count - j);
-               end -= (end % incr);
-               for (i = 0; i+2 < end; i++) {
-                  TRIANGLE(varray, flags, 0, i + 1, i + 2);
-               }
-               i = end;
-               fetch_init(varray, end);
-               varray_flush(varray);
-            }
-         }
-      }
-      break;
-
-   case PIPE_PRIM_QUADS:
-      for (j = 0; j + first <= count; j += i) {
-         unsigned end = MIN2(FETCH_MAX, count - j);
-         end -= (end % incr);
-         for (i = 0; i+3 < end; i += 4) {
-            QUAD(varray, i + 0, i + 1, i + 2, i + 3);
-         }
-         i = end;
-         fetch_init(varray, end);
-         varray_flush(varray);
-      }
-      break;
-
-   case PIPE_PRIM_QUAD_STRIP:
-      for (j = 0; j + first <= count; j += i) {
-         unsigned end = MIN2(FETCH_MAX, count - j);
-         end -= (end % incr);
-         for (i = 0; i+3 < end; i += 2) {
-            QUAD(varray, i + 2, i + 0, i + 1, i + 3);
-         }
-         i = end;
-         fetch_init(varray, end);
-         varray_flush(varray);
-         if (j + first + i <= count) {
-            varray->fetch_start -= 2;
-            i -= 2;
-         }
-      }
-      break;
-
-   case PIPE_PRIM_POLYGON:
-   {
-      /* These bitflags look a little odd because we submit the
-       * vertices as (1,2,0) to satisfy flatshade requirements.
-       */
-      const ushort edge_first  = DRAW_PIPE_EDGE_FLAG_2;
-      const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
-      const ushort edge_last   = DRAW_PIPE_EDGE_FLAG_1;
-
-      flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
-      for (j = 0; j + first <= count; j += i) {
-         unsigned end = MIN2(FETCH_MAX, count - j);
-         end -= (end % incr);
-         for (i = 0; i+2 < end; i++, flags = edge_middle) {
-
-            if (i + 3 == count)
-               flags |= edge_last;
-
-            TRIANGLE(varray, flags, i + 1, i + 2, 0);
-         }
-         i = end;
-         fetch_init(varray, end);
-         varray_flush(varray);
-      }
-   }
-   break;
-
-   default:
-      assert(0);
-      break;
-   }
-
-   varray_flush(varray);
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
deleted file mode 100644
index fc54476488..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ /dev/null
@@ -1,98 +0,0 @@
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
-   /*
-    * count either has been trimmed in draw_pt_arrays or is set to
-    * (driver)_fetch_max which is hopefully always larger than first.
-    */
-   assert(count >= first);
-   return count - (count - first) % incr;
-}
-
-static void FUNC(struct draw_pt_front_end *frontend,
-                 unsigned start,
-                 unsigned count)
-{
-   struct varray_frontend *varray = (struct varray_frontend *)frontend;
-
-   unsigned j;
-   unsigned first, incr;
-
-   draw_pt_split_prim(varray->input_prim, &first, &incr);
-   
-   /* Sanitize primitive length:
-    */
-   count = trim(count, first, incr); 
-   if (count < first)
-      return;
-
-#if 0
-   debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
-                varray->input_prim,
-                start, count);
-#endif
-
-   switch (varray->input_prim) {
-   case PIPE_PRIM_POINTS:
-   case PIPE_PRIM_LINES:
-   case PIPE_PRIM_TRIANGLES:
-   case PIPE_PRIM_LINE_STRIP:
-   case PIPE_PRIM_TRIANGLE_STRIP:
-   case PIPE_PRIM_QUADS:
-   case PIPE_PRIM_QUAD_STRIP:
-   case PIPE_PRIM_LINES_ADJACENCY:
-   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
-   case PIPE_PRIM_TRIANGLES_ADJACENCY:
-   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
-      for (j = 0; j < count;) {
-         unsigned remaining = count - j;
-         unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr );
-         varray_flush_linear(varray, start + j, nr);
-         j += nr;
-         if (nr != remaining) 
-            j -= (first - incr);
-      }
-      break;
-
-   case PIPE_PRIM_LINE_LOOP:
-      /* Always have to decompose as we've stated that this will be
-       * emitted as a line-strip.
-       */
-      for (j = 0; j < count;) {
-         unsigned remaining = count - j;
-         unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
-         varray_line_loop_segment(varray, start, j, nr, nr == remaining);
-         j += nr;
-         if (nr != remaining) 
-            j -= (first - incr);
-      }
-      break;
-
-
-   case PIPE_PRIM_POLYGON:
-   case PIPE_PRIM_TRIANGLE_FAN: 
-      if (count < varray->driver_fetch_max) {
-         varray_flush_linear(varray, start, count);
-      }
-      else {
-         for ( j = 0; j < count;) {
-            unsigned remaining = count - j;
-            unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
-            varray_fan_segment(varray, start, j, nr);
-            j += nr;
-            if (nr != remaining) 
-               j -= (first - incr);
-         }
-      }
-      break;
-
-   default:
-      assert(0);
-      break;
-   }
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
deleted file mode 100644
index 993f388dc3..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ /dev/null
@@ -1,611 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  */
-
-#include "util/u_memory.h"
-#include "util/u_prim.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-
-#define CACHE_MAX 256
-#define FETCH_MAX 256
-#define DRAW_MAX (16*1024)
-
-
-struct vcache_frontend {
-   struct draw_pt_front_end base;
-   struct draw_context *draw;
-
-   unsigned in[CACHE_MAX];
-   ushort out[CACHE_MAX];
-
-   ushort draw_elts[DRAW_MAX];
-   unsigned fetch_elts[FETCH_MAX];
-
-   unsigned draw_count;
-   unsigned fetch_count;
-   unsigned fetch_max;
-
-   struct draw_pt_middle_end *middle;
-
-   unsigned input_prim;
-   unsigned output_prim;
-
-   unsigned middle_prim;
-   unsigned opt;
-};
-
-
-static INLINE void
-vcache_flush( struct vcache_frontend *vcache )
-{
-   if (vcache->middle_prim != vcache->output_prim) {
-      vcache->middle_prim = vcache->output_prim;
-      vcache->middle->prepare( vcache->middle,
-                               vcache->middle_prim,
-                               vcache->opt,
-                               &vcache->fetch_max );
-   }
-
-   if (vcache->draw_count) {
-      vcache->middle->run( vcache->middle,
-                           vcache->fetch_elts,
-                           vcache->fetch_count,
-                           vcache->draw_elts,
-                           vcache->draw_count,
-                           0x0 );
-   }
-
-   memset(vcache->in, ~0, sizeof(vcache->in));
-   vcache->fetch_count = 0;
-   vcache->draw_count = 0;
-}
-
-
-static INLINE void 
-vcache_check_flush( struct vcache_frontend *vcache )
-{
-   if (vcache->draw_count + 6 >= DRAW_MAX ||
-       vcache->fetch_count + 6 >= FETCH_MAX) {
-      vcache_flush( vcache );
-   }
-}
-
-
-static INLINE void 
-vcache_elt( struct vcache_frontend *vcache,
-            unsigned felt,
-            ushort flags )
-{
-   unsigned idx = felt % CACHE_MAX;
-
-   if (vcache->in[idx] != felt) {
-      assert(vcache->fetch_count < FETCH_MAX);
-
-      vcache->in[idx] = felt;
-      vcache->out[idx] = (ushort)vcache->fetch_count;
-      vcache->fetch_elts[vcache->fetch_count++] = felt;
-   }
-
-   vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags;
-}
-
-
-                   
-static INLINE void 
-vcache_triangle( struct vcache_frontend *vcache,
-                 unsigned i0,
-                 unsigned i1,
-                 unsigned i2 )
-{
-   vcache_elt(vcache, i0, 0);
-   vcache_elt(vcache, i1, 0);
-   vcache_elt(vcache, i2, 0);
-   vcache_check_flush(vcache);
-}
-
-			  
-static INLINE void 
-vcache_triangle_flags( struct vcache_frontend *vcache,
-                       ushort flags,
-                       unsigned i0,
-                       unsigned i1,
-                       unsigned i2 )
-{
-   vcache_elt(vcache, i0, flags);
-   vcache_elt(vcache, i1, 0);
-   vcache_elt(vcache, i2, 0);
-   vcache_check_flush(vcache);
-}
-
-
-static INLINE void 
-vcache_line( struct vcache_frontend *vcache,
-             unsigned i0,
-             unsigned i1 )
-{
-   vcache_elt(vcache, i0, 0);
-   vcache_elt(vcache, i1, 0);
-   vcache_check_flush(vcache);
-}
-
-
-static INLINE void 
-vcache_line_flags( struct vcache_frontend *vcache,
-                   ushort flags,
-                   unsigned i0,
-                   unsigned i1 )
-{
-   vcache_elt(vcache, i0, flags);
-   vcache_elt(vcache, i1, 0);
-   vcache_check_flush(vcache);
-}
-
-
-static INLINE void 
-vcache_point( struct vcache_frontend *vcache,
-              unsigned i0 )
-{
-   vcache_elt(vcache, i0, 0);
-   vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_adj_flags( struct vcache_frontend *vcache,
-                       unsigned flags,
-                       unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
-{
-   vcache_elt(vcache, a0, 0);
-   vcache_elt(vcache, i0, flags);
-   vcache_elt(vcache, i1, 0);
-   vcache_elt(vcache, a1, 0);
-   vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_adj( struct vcache_frontend *vcache,
-                 unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
-{
-   vcache_elt(vcache, a0, 0);
-   vcache_elt(vcache, i0, 0);
-   vcache_elt(vcache, i1, 0);
-   vcache_elt(vcache, a1, 0);
-   vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_adj_flags( struct vcache_frontend *vcache,
-                           unsigned flags,
-                           unsigned i0, unsigned a0,
-                           unsigned i1, unsigned a1,
-                           unsigned i2, unsigned a2 )
-{
-   vcache_elt(vcache, i0, flags);
-   vcache_elt(vcache, a0, 0);
-   vcache_elt(vcache, i1, 0);
-   vcache_elt(vcache, a1, 0);
-   vcache_elt(vcache, i2, 0);
-   vcache_elt(vcache, a2, 0);
-   vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_adj( struct vcache_frontend *vcache,
-                     unsigned i0, unsigned a0,
-                     unsigned i1, unsigned a1,
-                     unsigned i2, unsigned a2 )
-{
-   vcache_elt(vcache, i0, 0);
-   vcache_elt(vcache, a0, 0);
-   vcache_elt(vcache, i1, 0);
-   vcache_elt(vcache, a1, 0);
-   vcache_elt(vcache, i2, 0);
-   vcache_elt(vcache, a2, 0);
-   vcache_check_flush(vcache);
-}
-
-
-/* At least for now, we're back to using a template include file for
- * this.  The two paths aren't too different though - it may be
- * possible to reunify them.
- */
-#define TRIANGLE(flags,i0,i1,i2) vcache_triangle_flags(vcache,flags,i0,i1,i2)
-#define LINE(flags,i0,i1)        vcache_line_flags(vcache,flags,i0,i1)
-#define POINT(i0)                vcache_point(vcache,i0)
-#define LINE_ADJ(flags,a0,i0,i1,a1) \
-   vcache_line_adj_flags(vcache,flags,a0,i0,i1,a1)
-#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
-   vcache_triangle_adj_flags(vcache,flags,i0,a0,i1,a1,i2,a2)
-#define FUNC vcache_run_extras
-#include "draw_pt_vcache_tmp.h"
-
-#define TRIANGLE(flags,i0,i1,i2) vcache_triangle(vcache,i0,i1,i2)
-#define LINE(flags,i0,i1)        vcache_line(vcache,i0,i1)
-#define POINT(i0)                vcache_point(vcache,i0)
-#define LINE_ADJ(flags,a0,i0,i1,a1) \
-   vcache_line_adj(vcache,a0,i0,i1,a1)
-#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
-   vcache_triangle_adj(vcache,i0,a0,i1,a1,i2,a2)
-#define FUNC vcache_run
-#include "draw_pt_vcache_tmp.h"
-
-static INLINE void 
-rebase_uint_elts( const unsigned *src,
-                  unsigned count,
-                  int delta,
-                  ushort *dest )
-{
-   unsigned i;
-   for (i = 0; i < count; i++) 
-      dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void 
-rebase_ushort_elts( const ushort *src,
-                    unsigned count,
-                    int delta,
-                    ushort *dest )
-{
-   unsigned i;
-   for (i = 0; i < count; i++) 
-      dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void 
-rebase_ubyte_elts( const ubyte *src,
-                   unsigned count,
-                   int delta,
-                   ushort *dest )
-{
-   unsigned i;
-   for (i = 0; i < count; i++) 
-      dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void 
-translate_uint_elts( const unsigned *src,
-                     unsigned count,
-                     ushort *dest )
-{
-   unsigned i;
-   for (i = 0; i < count; i++) 
-      dest[i] = (ushort)(src[i]);
-}
-
-
-static INLINE void 
-translate_ushort_elts( const ushort *src,
-                       unsigned count,
-                       ushort *dest )
-{
-   unsigned i;
-   for (i = 0; i < count; i++) 
-      dest[i] = (ushort)(src[i]);
-}
-
-
-static INLINE void 
-translate_ubyte_elts( const ubyte *src,
-                      unsigned count,
-                      ushort *dest )
-{
-   unsigned i;
-   for (i = 0; i < count; i++) 
-      dest[i] = (ushort)(src[i]);
-}
-
-
-
-
-#if 0
-static INLINE enum pipe_format 
-format_from_get_elt( pt_elt_func get_elt )
-{
-   switch (draw->pt.user.eltSize) {
-   case 1: return PIPE_FORMAT_R8_UNORM;
-   case 2: return PIPE_FORMAT_R16_UNORM;
-   case 4: return PIPE_FORMAT_R32_UNORM;
-   default: return PIPE_FORMAT_NONE;
-   }
-}
-#endif
-
-
-/**
- * Check if any vertex attributes use instance divisors.
- * Note that instance divisors complicate vertex fetching so we need
- * to take the vcache path when they're in use.
- */
-static boolean
-any_instance_divisors(const struct draw_context *draw)
-{
-   uint i;
-
-   for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
-      uint div = draw->pt.vertex_element[i].instance_divisor;
-      if (div)
-         return TRUE;
-   }
-   return FALSE;
-}
-
-
-static INLINE void 
-vcache_check_run( struct draw_pt_front_end *frontend, 
-                  unsigned draw_start,
-                  unsigned draw_count )
-{
-   struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; 
-   struct draw_context *draw = vcache->draw;
-   const unsigned min_index = draw->pt.user.min_index;
-   const unsigned max_index = draw->pt.user.max_index;
-   const unsigned index_size = draw->pt.user.eltSize;
-   const int elt_bias = draw->pt.user.eltBias;
-   unsigned fetch_count;
-   const ushort *transformed_elts;
-   ushort *storage = NULL;
-   boolean ok = FALSE;
-   const void *elts = draw_pt_elt_ptr(draw, draw_start);
-
-   /* debug: verify indexes are in range [min_index, max_index] */
-   if (0) {
-      unsigned i;
-      for (i = 0; i < draw_count; i++) {
-         if (index_size == 1) {
-            assert( ((const ubyte *) elts)[i] >= min_index);
-            assert( ((const ubyte *) elts)[i] <= max_index);
-         }
-         else if (index_size == 2) {
-            assert( ((const ushort *) elts)[i] >= min_index);
-            assert( ((const ushort *) elts)[i] <= max_index);
-         }
-         else {
-            assert(index_size == 4);
-            assert( ((const uint *) elts)[i] >= min_index);
-            assert( ((const uint *) elts)[i] <= max_index);
-         }
-      }
-   }
-
-   /* Note: max_index is frequently 0xffffffff so we have to be sure
-    * that any arithmetic involving max_index doesn't overflow!
-    */
-   if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES)
-      goto fail;
-
-   if (any_instance_divisors(draw))
-      goto fail;
-
-   fetch_count = max_index + 1 - min_index;
-
-   if (0)
-      debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, 
-                   vcache->fetch_max,
-                   draw_count);
-
-   if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
-       fetch_count >= UNDEFINED_VERTEX_ID ||
-       fetch_count > draw_count) {
-      if (0) debug_printf("fail\n");
-      goto fail;
-   }
-
-   if (vcache->middle_prim != vcache->input_prim) {
-      vcache->middle_prim = vcache->input_prim;
-      vcache->middle->prepare( vcache->middle,
-                               vcache->middle_prim,
-                               vcache->opt,
-                               &vcache->fetch_max );
-   }
-
-   assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
-          (elt_bias <  0 && min_index + elt_bias <  min_index));
-
-   if (min_index == 0 &&
-       index_size == 2) {
-      transformed_elts = (const ushort *)elts;
-   }
-   else {
-      storage = MALLOC( draw_count * sizeof(ushort) );
-      if (!storage)
-         goto fail;
-      
-      if (min_index == 0) {
-         switch(index_size) {
-         case 1:
-            translate_ubyte_elts( (const ubyte *)elts,
-                                  draw_count,
-                                  storage );
-            break;
-
-         case 2:
-            translate_ushort_elts( (const ushort *)elts,
-                                   draw_count,
-                                   storage );
-            break;
-
-         case 4:
-            translate_uint_elts( (const uint *)elts,
-                                 draw_count,
-                                 storage );
-            break;
-
-         default:
-            assert(0);
-            FREE(storage);
-            return;
-         }
-      }
-      else {
-         switch(index_size) {
-         case 1:
-            rebase_ubyte_elts( (const ubyte *)elts,
-                               draw_count,
-                               0 - (int)min_index,
-                               storage );
-            break;
-
-         case 2:
-            rebase_ushort_elts( (const ushort *)elts,
-                                draw_count,
-                                0 - (int)min_index,
-                                storage );
-            break;
-
-         case 4:
-            rebase_uint_elts( (const uint *)elts,
-                              draw_count,
-                              0 - (int)min_index,
-                              storage );
-            break;
-
-         default:
-            assert(0);
-            FREE(storage);
-            return;
-         }
-      }
-      transformed_elts = storage;
-   }
-
-   if (fetch_count < UNDEFINED_VERTEX_ID)
-      ok = vcache->middle->run_linear_elts( vcache->middle,
-                                            min_index + elt_bias, /* start */
-                                            fetch_count,
-                                            transformed_elts,
-                                            draw_count, 0x0 );
-   
-   FREE(storage);
-
-   if (ok)
-      return;
-
-   debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
-                fetch_count, draw_count);
-
-fail:
-   vcache_run( frontend, draw_start, draw_count );
-}
-
-
-
-
-static void
-vcache_prepare( struct draw_pt_front_end *frontend,
-                unsigned in_prim,
-                struct draw_pt_middle_end *middle,
-                unsigned opt )
-{
-   struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
-
-   if (opt & PT_PIPELINE) {
-      vcache->base.run = vcache_run_extras;
-   }
-   else {
-      vcache->base.run = vcache_check_run;
-   }
-
-   /* VCache will always emit the reduced version of its input
-    * primitive, ie STRIP/FANS become TRIS, etc.
-    *
-    * This is not to be confused with what the GS might be up to,
-    * which is a separate issue.
-    */
-   vcache->input_prim = in_prim;
-   switch (in_prim) {
-   case PIPE_PRIM_LINES_ADJACENCY:
-   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
-      vcache->output_prim = PIPE_PRIM_LINES_ADJACENCY;
-      break;
-   case PIPE_PRIM_TRIANGLES_ADJACENCY:
-   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
-      vcache->output_prim = PIPE_PRIM_TRIANGLES_ADJACENCY;
-      break;
-   default:
-      vcache->output_prim = u_reduced_prim(in_prim);
-   }
-
-   vcache->middle = middle;
-   vcache->opt = opt;
-
-   /* Have to run prepare here, but try and guess a good prim for
-    * doing so:
-    */
-   vcache->middle_prim = (opt & PT_PIPELINE)
-      ? vcache->output_prim : vcache->input_prim;
-
-   middle->prepare( middle,
-                    vcache->middle_prim,
-                    opt, &vcache->fetch_max );
-}
-
-
-static void 
-vcache_finish( struct draw_pt_front_end *frontend )
-{
-   struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
-   vcache->middle->finish( vcache->middle );
-   vcache->middle = NULL;
-}
-
-
-static void 
-vcache_destroy( struct draw_pt_front_end *frontend )
-{
-   FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
-{
-   struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
-   if (vcache == NULL)
-      return NULL;
- 
-   vcache->base.prepare = vcache_prepare;
-   vcache->base.run     = NULL;
-   vcache->base.finish  = vcache_finish;
-   vcache->base.destroy = vcache_destroy;
-   vcache->draw = draw;
-   
-   memset(vcache->in, ~0, sizeof(vcache->in));
-  
-   return &vcache->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
deleted file mode 100644
index e80a9c7f15..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#define FUNC_VARS                      \
-   struct draw_pt_front_end *frontend, \
-   unsigned start,                     \
-   unsigned count
-
-#define LOCAL_VARS \
-   struct vcache_frontend *vcache = (struct vcache_frontend *) frontend;   \
-   struct draw_context *draw = vcache->draw;                               \
-   const unsigned prim = vcache->input_prim;                               \
-   const void *elts = draw_pt_elt_ptr(draw, start);                        \
-   pt_elt_func get_elt = draw_pt_elt_func(draw);                           \
-   const int elt_bias = draw->pt.user.eltBias;                             \
-   const boolean last_vertex_last = !(draw->rasterizer->flatshade &&       \
-                                      draw->rasterizer->flatshade_first);  \
-   const unsigned prim_flags = 0x0;
-
-#define GET_ELT(idx) (get_elt(elts, idx) + elt_bias)
-
-#define FUNC_EXIT do { vcache_flush(vcache); } while (0)
-
-#include "draw_decompose_tmp.h"
-- 
cgit v1.2.3


From 7b3beb22405ee2de0cf02951b6547964a2989ee5 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Tue, 10 Aug 2010 00:39:23 +0800
Subject: draw: last_vertex_last is always true for GS and SO.

That is, OpenGL decomposition rule is assumed.  There should be a
pipe_context state to specify the rules.
---
 src/gallium/auxiliary/draw/draw_gs_tmp.h      | 7 ++-----
 src/gallium/auxiliary/draw/draw_so_emit_tmp.h | 5 +----
 2 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h
index 7c8a9f9cfc..de7b02655a 100644
--- a/src/gallium/auxiliary/draw/draw_gs_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h
@@ -6,13 +6,10 @@
 
 #define FUNC_ENTER                                                \
    /* declare more local vars */                                  \
-   struct draw_context *draw = gs->draw;                          \
    const unsigned prim = input_prims->prim;                       \
-   const unsigned count = input_prims->count;                     \
-   const boolean last_vertex_last =                               \
-      !(draw->rasterizer->flatshade &&                            \
-        draw->rasterizer->flatshade_first);                       \
    const unsigned prim_flags = input_prims->flags;                \
+   const unsigned count = input_prims->count;                     \
+   const boolean last_vertex_last = TRUE;                         \
    do {                                                           \
       debug_assert(input_prims->primitive_count == 1);            \
       switch (prim) {                                             \
diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
index 1446e81bba..7fafde9d5e 100644
--- a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
@@ -7,12 +7,9 @@
 
 #define FUNC_ENTER                                                \
    /* declare more local vars */                                  \
-   struct draw_context *draw = so->draw;                          \
    const unsigned prim = input_prims->prim;                       \
-   const boolean last_vertex_last =                               \
-      !(draw->rasterizer->flatshade &&                            \
-        draw->rasterizer->flatshade_first);                       \
    const unsigned prim_flags = input_prims->flags;                \
+   const boolean last_vertex_last = TRUE;                         \
    do {                                                           \
       debug_assert(input_prims->primitive_count == 1);            \
       switch (prim) {                                             \
-- 
cgit v1.2.3


From a072f0e186522f9de2848989422ad0244f65c961 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sat, 14 Aug 2010 00:05:28 +0800
Subject: drwa: Add PRIMITIVE macro to vsplit.

PRIMITIVE is used by the indexed path to flush the entire primitive with
custom vertex count checks.  It replaces the existing fast path.
---
 src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h | 46 ++++++++++++++-----------
 src/gallium/auxiliary/draw/draw_split_tmp.h     |  5 +++
 2 files changed, 31 insertions(+), 20 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
index efeaa56711..4bb57b1493 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -34,9 +34,8 @@
  * (rebased) index buffer as the draw elements.
  */
 static boolean
-CONCAT(vsplit_segment_fast_, ELT_TYPE)(struct vsplit_frontend *vsplit,
-                                       unsigned flags,
-                                       unsigned istart, unsigned icount)
+CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+                                    unsigned istart, unsigned icount)
 {
    struct draw_context *draw = vsplit->draw;
    const ELT_TYPE *ib = (const ELT_TYPE *) draw->pt.user.elts;
@@ -44,10 +43,25 @@ CONCAT(vsplit_segment_fast_, ELT_TYPE)(struct vsplit_frontend *vsplit,
    const unsigned max_index = draw->pt.user.max_index;
    const int elt_bias = draw->pt.user.eltBias;
    unsigned fetch_start, fetch_count;
-   const ushort *draw_elts;
+   const ushort *draw_elts = NULL;
    unsigned i;
 
-   assert(icount <= vsplit->segment_size);
+   /* use the ib directly */
+   if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
+      if (icount > vsplit->max_vertices)
+         return FALSE;
+
+      for (i = 0; i < icount; i++) {
+         ELT_TYPE idx = ib[istart + i];
+         assert(idx >= min_index && idx <= max_index);
+      }
+      draw_elts = (const ushort *) ib;
+   }
+   else {
+      /* have to go through vsplit->draw_elts */
+      if (icount > vsplit->segment_size)
+         return FALSE;
+   }
 
    /* this is faster only when we fetch less elements than the normal path */
    if (max_index - min_index > icount - 1)
@@ -65,14 +79,7 @@ CONCAT(vsplit_segment_fast_, ELT_TYPE)(struct vsplit_frontend *vsplit,
    fetch_start = min_index + elt_bias;
    fetch_count = max_index - min_index + 1;
 
-   if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
-      for (i = 0; i < icount; i++) {
-         ELT_TYPE idx = ib[istart + i];
-         assert(idx >= min_index && idx <= max_index);
-      }
-      draw_elts = (const ushort *) ib;
-   }
-   else {
+   if (!draw_elts) {
       if (min_index == 0) {
          for (i = 0; i < icount; i++) {
             ELT_TYPE idx = ib[istart + i];
@@ -95,7 +102,7 @@ CONCAT(vsplit_segment_fast_, ELT_TYPE)(struct vsplit_frontend *vsplit,
 
    return vsplit->middle->run_linear_elts(vsplit->middle,
                                           fetch_start, fetch_count,
-                                          draw_elts, icount, flags);
+                                          draw_elts, icount, 0x0);
 }
 
 /**
@@ -170,12 +177,6 @@ CONCAT(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit,
                                          unsigned istart,
                                          unsigned icount)
 {
-   /* the primitive is not splitted */
-   if (!(flags)) {
-      if (CONCAT(vsplit_segment_fast_, ELT_TYPE)(vsplit,
-               flags, istart, icount))
-         return;
-   }
    CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
          flags, istart, icount, FALSE, 0, FALSE, 0);
 }
@@ -213,6 +214,9 @@ CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit,
    const unsigned max_count_loop = vsplit->segment_size - 1;               \
    const unsigned max_count_fan = vsplit->segment_size;
 
+#define PRIMITIVE(istart, icount)   \
+   CONCAT(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount)
+
 #else /* ELT_TYPE */
 
 static void
@@ -274,6 +278,8 @@ vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags,
    const unsigned max_count_loop = vsplit->segment_size - 1;               \
    const unsigned max_count_fan = vsplit->segment_size;
 
+#define PRIMITIVE(istart, icount) FALSE
+
 #define ELT_TYPE linear
 
 #endif /* ELT_TYPE */
diff --git a/src/gallium/auxiliary/draw/draw_split_tmp.h b/src/gallium/auxiliary/draw/draw_split_tmp.h
index 40ab0b71f1..47defc62b9 100644
--- a/src/gallium/auxiliary/draw/draw_split_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_split_tmp.h
@@ -47,6 +47,10 @@ FUNC(FUNC_VARS)
    if (count < first)
       return;
 
+   /* try flushing the entire primitive */
+   if (PRIMITIVE(start, count))
+      return;
+
    /* must be able to at least flush two complete primitives */
    assert(max_count_simple >= first + incr &&
           max_count_loop >= first + incr &&
@@ -166,6 +170,7 @@ FUNC(FUNC_VARS)
 #undef FUNC_VARS
 #undef LOCAL_VARS
 
+#undef PRIMITIVE
 #undef SEGMENT_SIMPLE
 #undef SEGMENT_LOOP
 #undef SEGMENT_FAN
-- 
cgit v1.2.3


From c3fee80f2b35f6a7e48d6015bfc759c66b7e1a2c Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sat, 7 Aug 2010 21:02:13 +0800
Subject: draw: Remove DRAW_PIPE_MAX_VERTICES and DRAW_PIPE_FLAG_MASK.

The higher bits of draw elements are no longer used for the stipple or
edge flags.
---
 src/gallium/auxiliary/draw/draw_gs.c               |  2 +-
 src/gallium/auxiliary/draw/draw_pipe.c             | 11 +++--------
 src/gallium/auxiliary/draw/draw_private.h          | 22 ++++++++--------------
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |  6 +++---
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c       |  6 +++---
 src/gallium/auxiliary/draw/draw_pt_so_emit.c       |  2 +-
 6 files changed, 19 insertions(+), 30 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 592f71bfbe..50a03ac95a 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -380,7 +380,7 @@ static void gs_tri_adj(struct draw_geometry_shader *shader,
 
 #define FUNC         gs_run_elts
 #define LOCAL_VARS   const ushort *elts = input_prims->elts;
-#define GET_ELT(idx) (elts[idx] & ~DRAW_PIPE_FLAG_MASK)
+#define GET_ELT(idx) (elts[idx])
 #include "draw_gs_tmp.h"
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 43c25167a9..b75262a357 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -173,27 +173,23 @@ static void do_triangle( struct draw_context *draw,
 
 #define TRIANGLE(flags,i0,i1,i2)                                  \
    do {                                                           \
-      assert(!((i1) & DRAW_PIPE_FLAG_MASK));                      \
-      assert(!((i2) & DRAW_PIPE_FLAG_MASK));                      \
       do_triangle( draw,                                          \
                    flags,                                         \
-                   verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK),  \
+                   verts + stride * (i0),                         \
                    verts + stride * (i1),                         \
                    verts + stride * (i2) );                       \
    } while (0)
 
 #define LINE(flags,i0,i1)                                         \
    do {                                                           \
-      assert(!((i1) & DRAW_PIPE_FLAG_MASK));                      \
       do_line( draw,                                              \
                flags,                                             \
-               verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK),      \
+               verts + stride * (i0),                             \
                verts + stride * (i1) );                           \
    } while (0)
 
 #define POINT(i0)                               \
    do {                                         \
-      assert(!((i0) & DRAW_PIPE_FLAG_MASK));    \
       do_point( draw, verts + stride * (i0) );  \
    } while (0)
 
@@ -247,8 +243,7 @@ void draw_pipeline_run( struct draw_context *draw,
          unsigned max_index = 0x0, i;
          /* find the largest element index */
          for (i = 0; i < count; i++) {
-            unsigned int index = (prim_info->elts[start + i]
-                                  & ~DRAW_PIPE_FLAG_MASK);
+            unsigned int index = prim_info->elts[start + i];
             if (index > max_index)
                max_index = index;
          }
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 94b688f891..854c45f060 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -373,21 +373,15 @@ void draw_pipeline_destroy( struct draw_context *draw );
 
 
 
-/* We use the top few bits in the elts[] parameter to convey a little
- * API information.  This limits the number of vertices we can address
- * to only 4096 -- if that becomes a problem, we can switch to 32-bit
- * draw indices.
- *
- * These flags expected at first vertex of lines & triangles when
- * unfilled and/or line stipple modes are operational.
+/*
+ * These flags are used by the pipeline when unfilled and/or line stipple modes
+ * are operational.
  */
-#define DRAW_PIPE_MAX_VERTICES  (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_0   (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_1   (0x2<<12)
-#define DRAW_PIPE_EDGE_FLAG_2   (0x4<<12)
-#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12)
-#define DRAW_PIPE_RESET_STIPPLE (0x8<<12)
-#define DRAW_PIPE_FLAG_MASK     (0xf<<12)
+#define DRAW_PIPE_EDGE_FLAG_0   0x1
+#define DRAW_PIPE_EDGE_FLAG_1   0x2
+#define DRAW_PIPE_EDGE_FLAG_2   0x4
+#define DRAW_PIPE_EDGE_FLAG_ALL 0x7
+#define DRAW_PIPE_RESET_STIPPLE 0x8
 
 void draw_pipeline_run( struct draw_context *draw,
                         const struct draw_vertex_info *vert,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 1ac20d27f3..4d2d24d2df 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -112,11 +112,11 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
 			    gs_out_prim,
                             max_vertices );
 
-      *max_vertices = MAX2( *max_vertices,
-                            DRAW_PIPE_MAX_VERTICES );
+      *max_vertices = MAX2( *max_vertices, 4096 );
    }
    else {
-      *max_vertices = DRAW_PIPE_MAX_VERTICES; 
+      /* limit max fetches by limiting max_vertices */
+      *max_vertices = 4096;
    }
 
    /* return even number */
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 8f2847ffa0..572aa67e60 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -118,11 +118,11 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
 			    out_prim,
                             max_vertices );
 
-      *max_vertices = MAX2( *max_vertices,
-                            DRAW_PIPE_MAX_VERTICES );
+      *max_vertices = MAX2( *max_vertices, 4096 );
    }
    else {
-      *max_vertices = DRAW_PIPE_MAX_VERTICES;
+      /* limit max fetches by limiting max_vertices */
+      *max_vertices = 4096;
    }
 
    /* return even number */
diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
index f7f4f24d35..c86bdd99a3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -225,7 +225,7 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2)
 
 #define FUNC         so_run_elts
 #define LOCAL_VARS   const ushort *elts = input_prims->elts;
-#define GET_ELT(idx) (elts[start + (idx)] & ~DRAW_PIPE_FLAG_MASK)
+#define GET_ELT(idx) (elts[start + (idx)])
 #include "draw_so_emit_tmp.h"
 
 
-- 
cgit v1.2.3


From aaf51ed7c24a5d9488f8225972e5d5d108c6c197 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Tue, 10 Aug 2010 01:05:25 +0800
Subject: draw: No need to make max_vertices even.

Triangle strip alternates the front/back orientation of its triangles.
max_vertices was made even so that varray never splitted a triangle
strip at the wrong positions.

It did not work with triangle strips with adjacencies.  And it is no
longer relevant with vsplit.
---
 src/gallium/auxiliary/draw/draw_pipe_vbuf.c                    | 3 ---
 src/gallium/auxiliary/draw/draw_pt_emit.c                      | 3 ---
 src/gallium/auxiliary/draw/draw_pt_fetch_emit.c                | 9 ---------
 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c          | 9 ---------
 src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c      | 3 ---
 src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c | 3 ---
 6 files changed, 30 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 3c93c9014a..58c5858734 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -353,9 +353,6 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
    /* Allocate a new vertex buffer */
    vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
 
-   /* even number */
-   vbuf->max_vertices = vbuf->max_vertices & ~1;
-
    if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
       vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 5568fbb9f8..89d96c4235 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -120,9 +120,6 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
 
    *max_vertices = (draw->render->max_vertex_buffer_bytes / 
                     (vinfo->size * 4));
-
-   /* even number */
-   *max_vertices = *max_vertices & ~1;
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index d826e79dbf..80a89428b6 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -191,15 +191,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
 
    *max_vertices = (draw->render->max_vertex_buffer_bytes / 
                     (vinfo->size * 4));
-
-   /* Return an even number of verts.
-    * This prevents "parity" errors when splitting long triangle strips which
-    * can lead to front/back culling mix-ups.
-    * Every other triangle in a strip has an alternate front/back orientation
-    * so splitting at an odd position can cause the orientation of subsequent
-    * triangles to get reversed.
-    */
-   *max_vertices = *max_vertices & ~1;
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index c64104dda5..a31d3feb16 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -175,15 +175,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
    *max_vertices = (draw->render->max_vertex_buffer_bytes / 
                     (vinfo->size * 4));
 
-   /* Return an even number of verts.
-    * This prevents "parity" errors when splitting long triangle strips which
-    * can lead to front/back culling mix-ups.
-    * Every other triangle in a strip has an alternate front/back orientation
-    * so splitting at an odd position can cause the orientation of subsequent
-    * triangles to get reversed.
-    */
-   *max_vertices = *max_vertices & ~1;
-
    /* Probably need to do this somewhere (or fix exec shader not to
     * need it):
     */
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 4d2d24d2df..96b40fb363 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -119,9 +119,6 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
       *max_vertices = 4096;
    }
 
-   /* return even number */
-   *max_vertices = *max_vertices & ~1;
-
    /* No need to prepare the shader.
     */
    vs->prepare(vs, draw);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 572aa67e60..78b1bf988c 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -125,9 +125,6 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
       *max_vertices = 4096;
    }
 
-   /* return even number */
-   *max_vertices = *max_vertices & ~1;
-
    draw_llvm_make_variant_key(fpme->llvm, &key);
 
    li = first_elem(&shader->variants);
-- 
cgit v1.2.3


From 9271059b361128070c68b3d1a7982b4f9f151546 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Mon, 16 Aug 2010 22:00:45 +0800
Subject: drwa: Fix polygon edge flags.

Fix a copy-and-paste error introduced by
f141abdc8fdbff41e16b0ce53fa3fa8fba32a7f9.
---
 src/gallium/auxiliary/draw/draw_decompose_tmp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
index be3a997c3d..a142563af9 100644
--- a/src/gallium/auxiliary/draw/draw_decompose_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
@@ -257,7 +257,7 @@ FUNC(FUNC_VARS)
             flags = (DRAW_PIPE_RESET_STIPPLE |
                      DRAW_PIPE_EDGE_FLAG_0);
             if (!(prim_flags & DRAW_SPLIT_BEFORE))
-               flags |= DRAW_PIPE_EDGE_FLAG_1;
+               flags |= DRAW_PIPE_EDGE_FLAG_2;
 
             edge_next = DRAW_PIPE_EDGE_FLAG_0;
             edge_finish =
-- 
cgit v1.2.3


From ddcf028aa0a1bd6f79381164c8b1c3b816792e47 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Tue, 10 Aug 2010 09:51:20 +0200
Subject: translate_generic: use memcpy if possible (v3)

Changes in v3:
- If we can do a copy, don't try to get an emit func, as that can assert(0)

Changes in v2:
- Add comment regarding copy_size

When used in GPU drivers, translate can be used to simultaneously
perform a gather operation, and convert away from unsupported formats.

In this use case, input and output formats will often be identical: clearly
it would make sense to use a memcpy in this case.

Instead, translate will insist to convert to and from 32-bit floating point
numbers.

This is not only extremely expensive, but it also loses precision for
32/64-bit integers and 64-bit floating point numbers.

This patch changes translate_generic to just use memcpy if the formats are
identical, non-blocked, and with an integral number of bytes per pixel (note
that all sensible vertex formats are like this).
---
 .../auxiliary/translate/translate_generic.c        | 108 ++++++++++++++-------
 1 file changed, 75 insertions(+), 33 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 42cfd763e9..9d2653920d 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -64,6 +64,14 @@ struct translate_generic {
       unsigned input_stride;
       unsigned max_index;
 
+      /* this value is set to -1 if this is a normal element with output_format != input_format:
+       * in this case, u_format is used to do a full conversion
+       *
+       * this value is set to the format size in bytes if output_format == input_format or for 32-bit instance ids:
+       * in this case, memcpy is used to copy this amount of bytes
+       */
+      int copy_size;
+
    } attrib[PIPE_MAX_ATTRIBS];
 
    unsigned nr_attrib;
@@ -354,8 +362,6 @@ static emit_func get_emit_func( enum pipe_format format )
    }
 }
 
-
-
 /**
  * Fetch vertex attributes for 'count' vertices.
  */
@@ -380,9 +386,10 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
 	 float data[4];
 	 char *dst = vert + tg->attrib[attr].output_offset;
 
-         if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+	 if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
             const uint8_t *src;
             unsigned index;
+            int copy_size;
 
             if (tg->attrib[attr].instance_divisor) {
                index = instance_id / tg->attrib[attr].instance_divisor;
@@ -396,27 +403,34 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
             src = tg->attrib[attr].input_ptr +
                   tg->attrib[attr].input_stride * index;
 
-            tg->attrib[attr].fetch( data, src, 0, 0 );
-
-            if (0)
-               debug_printf("Fetch elt attr %d  from %p  stride %d  div %u  max %u  index %d:  "
-                            " %f, %f, %f, %f \n",
-                            attr,
-                            tg->attrib[attr].input_ptr,
-                            tg->attrib[attr].input_stride,
-                            tg->attrib[attr].instance_divisor,
-                            tg->attrib[attr].max_index,
-                            index,
-                            data[0], data[1],data[2], data[3]);
+            copy_size = tg->attrib[attr].copy_size;
+            if(likely(copy_size >= 0))
+               memcpy(dst, src, copy_size);
+            else
+            {
+               tg->attrib[attr].fetch( data, src, 0, 0 );
+
+               if (0)
+                  debug_printf("Fetch elt attr %d  from %p  stride %d  div %u  max %u  index %d:  "
+                               " %f, %f, %f, %f \n",
+                               attr,
+                               tg->attrib[attr].input_ptr,
+                               tg->attrib[attr].input_stride,
+                               tg->attrib[attr].instance_divisor,
+                               tg->attrib[attr].max_index,
+                               index,
+                               data[0], data[1],data[2], data[3]);
+               tg->attrib[attr].emit( data, dst );
+            }
          } else {
-            data[0] = (float)instance_id;
+            if(likely(tg->attrib[attr].copy_size >= 0))
+               memcpy(data, &instance_id, 4);
+            else
+            {
+               data[0] = (float)instance_id;
+               tg->attrib[attr].emit( data, dst );
+            }
          }
-
-         if (0)
-            debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
-                         i, elt, attr, data[0], data[1], data[2], data[3]);
-
-	 tg->attrib[attr].emit( data, dst );
       }
       vert += tg->translate.key.output_stride;
    }
@@ -448,6 +462,7 @@ static void PIPE_CDECL generic_run( struct translate *translate,
          if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
             const uint8_t *src;
             unsigned index;
+            int copy_size;
 
             if (tg->attrib[attr].instance_divisor) {
                index = instance_id / tg->attrib[attr].instance_divisor;
@@ -462,25 +477,33 @@ static void PIPE_CDECL generic_run( struct translate *translate,
             src = tg->attrib[attr].input_ptr +
                   tg->attrib[attr].input_stride * index;
 
-            tg->attrib[attr].fetch( data, src, 0, 0 );
+            copy_size = tg->attrib[attr].copy_size;
+            if(likely(copy_size >= 0))
+               memcpy(dst, src, copy_size);
+            else
+            {
+               tg->attrib[attr].fetch( data, src, 0, 0 );
 
-            if (0)
-               debug_printf("Fetch linear attr %d  from %p  stride %d  index %d: "
+               if (0)
+                  debug_printf("Fetch linear attr %d  from %p  stride %d  index %d: "
                             " %f, %f, %f, %f \n",
                             attr,
                             tg->attrib[attr].input_ptr,
                             tg->attrib[attr].input_stride,
                             index,
                             data[0], data[1],data[2], data[3]);
+
+               tg->attrib[attr].emit( data, dst );
+            }
          } else {
-            data[0] = (float)instance_id;
+            if(likely(tg->attrib[attr].copy_size >= 0))
+               memcpy(data, &instance_id, 4);
+            else
+            {
+               data[0] = (float)instance_id;
+               tg->attrib[attr].emit( data, dst );
+            }
          }
-
-         if (0)
-            debug_printf("vert %d attr %d: %f %f %f %f\n",
-                         i, attr, data[0], data[1], data[2], data[3]);
-
-	 tg->attrib[attr].emit( data, dst );
       }
       
       vert += tg->translate.key.output_stride;
@@ -544,9 +567,28 @@ struct translate *translate_generic_create( const struct translate_key *key )
       tg->attrib[i].input_offset = key->element[i].input_offset;
       tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
 
-      tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
       tg->attrib[i].output_offset = key->element[i].output_offset;
 
+      tg->attrib[i].copy_size = -1;
+      if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID)
+      {
+            if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED
+                  || key->element[i].output_format == PIPE_FORMAT_R32_SSCALED)
+               tg->attrib[i].copy_size = 4;
+      }
+      else
+      {
+         if(key->element[i].input_format == key->element[i].output_format
+               && format_desc->block.width == 1
+               && format_desc->block.height == 1
+               && !(format_desc->block.bits & 7))
+            tg->attrib[i].copy_size = format_desc->block.bits >> 3;
+      }
+
+      if(tg->attrib[i].copy_size < 0)
+	      tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
+      else
+	      tg->attrib[i].emit  = NULL;
    }
 
    tg->nr_attrib = key->nr_elements;
-- 
cgit v1.2.3


From 1cb92fb92e69b5b138293398a98665c2a3c63a5b Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Tue, 10 Aug 2010 10:27:14 +0200
Subject: translate_generic: factor out common code between linear and indexed

This moves the common code into a separate ALWAYS_INLINE function.
---
 .../auxiliary/translate/translate_generic.c        | 177 ++++++++-------------
 1 file changed, 62 insertions(+), 115 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 9d2653920d..828b76dc77 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -362,6 +362,66 @@ static emit_func get_emit_func( enum pipe_format format )
    }
 }
 
+static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg,
+                                         unsigned elt,
+                                         unsigned instance_id,
+                                         void *vert )
+{
+   unsigned nr_attrs = tg->nr_attrib;
+   unsigned attr;
+
+   for (attr = 0; attr < nr_attrs; attr++) {
+      float data[4];
+      char *dst = vert + tg->attrib[attr].output_offset;
+
+      if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+         const uint8_t *src;
+         unsigned index;
+         int copy_size;
+
+         if (tg->attrib[attr].instance_divisor) {
+            index = instance_id / tg->attrib[attr].instance_divisor;
+         }
+         else {
+            index = elt;
+         }
+
+         /* clamp to void going out of bounds */
+         index = MIN2(index, tg->attrib[attr].max_index);
+
+         src = tg->attrib[attr].input_ptr +
+               tg->attrib[attr].input_stride * index;
+
+         copy_size = tg->attrib[attr].copy_size;
+         if(likely(copy_size >= 0))
+            memcpy(dst, src, copy_size);
+         else
+         {
+            tg->attrib[attr].fetch( data, src, 0, 0 );
+
+            if (0)
+               debug_printf("Fetch linear attr %d  from %p  stride %d  index %d: "
+                         " %f, %f, %f, %f \n",
+                         attr,
+                         tg->attrib[attr].input_ptr,
+                         tg->attrib[attr].input_stride,
+                         index,
+                         data[0], data[1],data[2], data[3]);
+
+            tg->attrib[attr].emit( data, dst );
+         }
+      } else {
+         if(likely(tg->attrib[attr].copy_size >= 0))
+            memcpy(data, &instance_id, 4);
+         else
+         {
+            data[0] = (float)instance_id;
+            tg->attrib[attr].emit( data, dst );
+         }
+      }
+   }
+}
+
 /**
  * Fetch vertex attributes for 'count' vertices.
  */
@@ -373,71 +433,14 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
 {
    struct translate_generic *tg = translate_generic(translate);
    char *vert = output_buffer;
-   unsigned nr_attrs = tg->nr_attrib;
-   unsigned attr;
    unsigned i;
 
-   /* loop over vertex attributes (vertex shader inputs)
-    */
    for (i = 0; i < count; i++) {
-      const unsigned elt = *elts++;
-
-      for (attr = 0; attr < nr_attrs; attr++) {
-	 float data[4];
-	 char *dst = vert + tg->attrib[attr].output_offset;
-
-	 if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
-            const uint8_t *src;
-            unsigned index;
-            int copy_size;
-
-            if (tg->attrib[attr].instance_divisor) {
-               index = instance_id / tg->attrib[attr].instance_divisor;
-            } else {
-               index = elt;
-            }
-
-            /* clamp to void going out of bounds */
-            index = MIN2(index, tg->attrib[attr].max_index);
-
-            src = tg->attrib[attr].input_ptr +
-                  tg->attrib[attr].input_stride * index;
-
-            copy_size = tg->attrib[attr].copy_size;
-            if(likely(copy_size >= 0))
-               memcpy(dst, src, copy_size);
-            else
-            {
-               tg->attrib[attr].fetch( data, src, 0, 0 );
-
-               if (0)
-                  debug_printf("Fetch elt attr %d  from %p  stride %d  div %u  max %u  index %d:  "
-                               " %f, %f, %f, %f \n",
-                               attr,
-                               tg->attrib[attr].input_ptr,
-                               tg->attrib[attr].input_stride,
-                               tg->attrib[attr].instance_divisor,
-                               tg->attrib[attr].max_index,
-                               index,
-                               data[0], data[1],data[2], data[3]);
-               tg->attrib[attr].emit( data, dst );
-            }
-         } else {
-            if(likely(tg->attrib[attr].copy_size >= 0))
-               memcpy(data, &instance_id, 4);
-            else
-            {
-               data[0] = (float)instance_id;
-               tg->attrib[attr].emit( data, dst );
-            }
-         }
-      }
+      generic_run_one(tg, *elts++, instance_id, vert);
       vert += tg->translate.key.output_stride;
    }
 }
 
-
-
 static void PIPE_CDECL generic_run( struct translate *translate,
                                     unsigned start,
                                     unsigned count,
@@ -446,66 +449,10 @@ static void PIPE_CDECL generic_run( struct translate *translate,
 {
    struct translate_generic *tg = translate_generic(translate);
    char *vert = output_buffer;
-   unsigned nr_attrs = tg->nr_attrib;
-   unsigned attr;
    unsigned i;
 
-   /* loop over vertex attributes (vertex shader inputs)
-    */
    for (i = 0; i < count; i++) {
-      unsigned elt = start + i;
-
-      for (attr = 0; attr < nr_attrs; attr++) {
-	 float data[4];
-	 char *dst = vert + tg->attrib[attr].output_offset;
-
-         if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
-            const uint8_t *src;
-            unsigned index;
-            int copy_size;
-
-            if (tg->attrib[attr].instance_divisor) {
-               index = instance_id / tg->attrib[attr].instance_divisor;
-            }
-            else {
-               index = elt;
-            }
-
-            /* clamp to void going out of bounds */
-            index = MIN2(index, tg->attrib[attr].max_index);
-
-            src = tg->attrib[attr].input_ptr +
-                  tg->attrib[attr].input_stride * index;
-
-            copy_size = tg->attrib[attr].copy_size;
-            if(likely(copy_size >= 0))
-               memcpy(dst, src, copy_size);
-            else
-            {
-               tg->attrib[attr].fetch( data, src, 0, 0 );
-
-               if (0)
-                  debug_printf("Fetch linear attr %d  from %p  stride %d  index %d: "
-                            " %f, %f, %f, %f \n",
-                            attr,
-                            tg->attrib[attr].input_ptr,
-                            tg->attrib[attr].input_stride,
-                            index,
-                            data[0], data[1],data[2], data[3]);
-
-               tg->attrib[attr].emit( data, dst );
-            }
-         } else {
-            if(likely(tg->attrib[attr].copy_size >= 0))
-               memcpy(data, &instance_id, 4);
-            else
-            {
-               data[0] = (float)instance_id;
-               tg->attrib[attr].emit( data, dst );
-            }
-         }
-      }
-      
+      generic_run_one(tg, start + i, instance_id, vert);
       vert += tg->translate.key.output_stride;
    }
 }
-- 
cgit v1.2.3


From 68e74f1b0110348a44f589739c6edf3fe8e2b368 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Tue, 10 Aug 2010 10:31:48 +0200
Subject: translate_sse: remove useless generated function wrappers

Currently translate_sse puts two trivial wrappers in the translate vtable.

These slow it down and enlarge the source code for no gain, except perhaps
the ability to set a breakpoint there, so remove them.

Breakpoints can be set on the caller of the translate functions, with no
loss of functionality.
---
 src/gallium/auxiliary/translate/translate_sse.c | 55 ++-----------------------
 1 file changed, 4 insertions(+), 51 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index ef3aa674a3..68c71f4251 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -46,18 +46,6 @@
 #define W    3
 
 
-typedef void (PIPE_CDECL *run_func)( struct translate *translate,
-                                     unsigned start,
-                                     unsigned count,
-                                     unsigned instance_id,
-                                     void *output_buffer);
-
-typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
-                                          const unsigned *elts,
-                                          unsigned count,
-                                          unsigned instance_id,
-                                          void *output_buffer);
-
 struct translate_buffer {
    const void *base_ptr;
    unsigned stride;
@@ -102,9 +90,6 @@ struct translate_sse {
    boolean use_instancing;
    unsigned instance_id;
 
-   run_func      gen_run;
-   run_elts_func gen_run_elts;
-
    /* these are actually known values, but putting them in a struct
     * like this is helpful to keep them in sync across the file.
     */
@@ -700,36 +685,6 @@ static void translate_sse_release( struct translate *translate )
    FREE(p);
 }
 
-static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
-			      const unsigned *elts,
-			      unsigned count,
-                              unsigned instance_id,
-			      void *output_buffer )
-{
-   struct translate_sse *p = (struct translate_sse *)translate;
-
-   p->gen_run_elts( translate,
-		    elts,
-		    count,
-                    instance_id,
-                    output_buffer);
-}
-
-static void PIPE_CDECL translate_sse_run( struct translate *translate,
-			 unsigned start,
-			 unsigned count,
-                         unsigned instance_id,
-			 void *output_buffer )
-{
-   struct translate_sse *p = (struct translate_sse *)translate;
-
-   p->gen_run( translate,
-	       start,
-	       count,
-               instance_id,
-               output_buffer);
-}
-
 
 struct translate *translate_sse2_create( const struct translate_key *key )
 {
@@ -746,8 +701,6 @@ struct translate *translate_sse2_create( const struct translate_key *key )
    p->translate.key = *key;
    p->translate.release = translate_sse_release;
    p->translate.set_buffer = translate_sse_set_buffer;
-   p->translate.run_elts = translate_sse_run_elts;
-   p->translate.run = translate_sse_run;
 
    for (i = 0; i < key->nr_elements; i++) {
       if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
@@ -789,12 +742,12 @@ struct translate *translate_sse2_create( const struct translate_key *key )
    if (!build_vertex_emit(p, &p->elt_func, FALSE))
       goto fail;
 
-   p->gen_run = (run_func)x86_get_func(&p->linear_func);
-   if (p->gen_run == NULL)
+   p->translate.run = (void*)x86_get_func(&p->linear_func);
+   if (p->translate.run == NULL)
       goto fail;
 
-   p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func);
-   if (p->gen_run_elts == NULL)
+   p->translate.run_elts = (void*)x86_get_func(&p->elt_func);
+   if (p->translate.run_elts == NULL)
       goto fail;
 
    return &p->translate;
-- 
cgit v1.2.3


From 4a4e29a9ab96d44fca9bb25064e12715aac85cbd Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Tue, 10 Aug 2010 10:47:23 +0200
Subject: translate: add support for 8/16-bit indices

Currently, only 32-bit indices are supported, but some use cases
translate needs support for all types.
---
 src/gallium/auxiliary/rtasm/rtasm_x86sse.c         | 14 +++++
 src/gallium/auxiliary/rtasm/rtasm_x86sse.h         |  2 +
 src/gallium/auxiliary/translate/translate.h        | 12 ++++
 .../auxiliary/translate/translate_generic.c        | 34 +++++++++++
 src/gallium/auxiliary/translate/translate_sse.c    | 65 +++++++++++++++-------
 5 files changed, 108 insertions(+), 19 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 9f70b73698..63007c1feb 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -586,6 +586,20 @@ void x86_mov( struct x86_function *p,
    emit_op_modrm( p, 0x8b, 0x89, dst, src );
 }
 
+void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR( dst, src );
+   emit_2ub(p, 0x0f, 0xb6);
+   emit_modrm(p, dst, src);
+}
+
+void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR( dst, src );
+   emit_2ub(p, 0x0f, 0xb7);
+   emit_modrm(p, dst, src);
+}
+
 void x86_xor( struct x86_function *p,
 	      struct x86_reg dst,
 	      struct x86_reg src )
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 6208e8f707..365dec109e 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -237,6 +237,8 @@ void x86_dec( struct x86_function *p, struct x86_reg reg );
 void x86_inc( struct x86_function *p, struct x86_reg reg );
 void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void x86_mul( struct x86_function *p, struct x86_reg src );
 void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index eb6f2cc486..a75380228b 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -85,6 +85,18 @@ struct translate {
                                 unsigned instance_id,
                                 void *output_buffer);
 
+   void (PIPE_CDECL *run_elts16)( struct translate *,
+                                const uint16_t *elts,
+                                unsigned count,
+                                unsigned instance_id,
+                                void *output_buffer);
+
+   void (PIPE_CDECL *run_elts8)( struct translate *,
+                                const uint8_t *elts,
+                                unsigned count,
+                                unsigned instance_id,
+                                void *output_buffer);
+
    void (PIPE_CDECL *run)( struct translate *,
                            unsigned start,
                            unsigned count,
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 828b76dc77..975f23a6f4 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -441,6 +441,38 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
    }
 }
 
+static void PIPE_CDECL generic_run_elts16( struct translate *translate,
+                                         const uint16_t *elts,
+                                         unsigned count,
+                                         unsigned instance_id,
+                                         void *output_buffer )
+{
+   struct translate_generic *tg = translate_generic(translate);
+   char *vert = output_buffer;
+   unsigned i;
+
+   for (i = 0; i < count; i++) {
+      generic_run_one(tg, *elts++, instance_id, vert);
+      vert += tg->translate.key.output_stride;
+   }
+}
+
+static void PIPE_CDECL generic_run_elts8( struct translate *translate,
+                                         const uint8_t *elts,
+                                         unsigned count,
+                                         unsigned instance_id,
+                                         void *output_buffer )
+{
+   struct translate_generic *tg = translate_generic(translate);
+   char *vert = output_buffer;
+   unsigned i;
+
+   for (i = 0; i < count; i++) {
+      generic_run_one(tg, *elts++, instance_id, vert);
+      vert += tg->translate.key.output_stride;
+   }
+}
+
 static void PIPE_CDECL generic_run( struct translate *translate,
                                     unsigned start,
                                     unsigned count,
@@ -498,6 +530,8 @@ struct translate *translate_generic_create( const struct translate_key *key )
    tg->translate.release = generic_release;
    tg->translate.set_buffer = generic_set_buffer;
    tg->translate.run_elts = generic_run_elts;
+   tg->translate.run_elts16 = generic_run_elts16;
+   tg->translate.run_elts8 = generic_run_elts8;
    tg->translate.run = generic_run;
 
    for (i = 0; i < key->nr_elements; i++) {
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 68c71f4251..f9aab9232c 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -67,6 +67,8 @@ struct translate_sse {
 
    struct x86_function linear_func;
    struct x86_function elt_func;
+   struct x86_function elt16_func;
+   struct x86_function elt8_func;
    struct x86_function *func;
 
    boolean loaded_identity;
@@ -362,7 +364,7 @@ static boolean translate_attr( struct translate_sse *p,
 
 
 static boolean init_inputs( struct translate_sse *p,
-                            boolean linear )
+                            unsigned index_size )
 {
    unsigned i;
    struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
@@ -372,7 +374,7 @@ static boolean init_inputs( struct translate_sse *p,
       struct translate_buffer_varient *varient = &p->buffer_varient[i];
       struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
 
-      if (linear || varient->instance_divisor) {
+      if (!index_size || varient->instance_divisor) {
          struct x86_reg buf_stride   = x86_make_disp(p->machine_EDX,
                                                      get_offset(p, &buffer->stride));
          struct x86_reg buf_ptr      = x86_make_disp(p->machine_EDX,
@@ -421,7 +423,7 @@ static boolean init_inputs( struct translate_sse *p,
          /* In the linear case, keep the buffer pointer instead of the
           * index number.
           */
-         if (linear && p->nr_buffer_varients == 1)
+         if (!index_size && p->nr_buffer_varients == 1)
             x86_mov(p->func, elt, tmp_EAX);
          else
             x86_mov(p->func, buf_ptr, tmp_EAX);
@@ -433,7 +435,7 @@ static boolean init_inputs( struct translate_sse *p,
 
 
 static struct x86_reg get_buffer_ptr( struct translate_sse *p,
-                                      boolean linear,
+                                      unsigned index_size,
                                       unsigned var_idx,
                                       struct x86_reg elt )
 {
@@ -441,10 +443,10 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
       return x86_make_disp(p->machine_EDX,
                            get_offset(p, &p->instance_id));
    }
-   if (linear && p->nr_buffer_varients == 1) {
+   if (!index_size && p->nr_buffer_varients == 1) {
       return p->idx_EBX;
    }
-   else if (linear || p->buffer_varient[var_idx].instance_divisor) {
+   else if (!index_size || p->buffer_varient[var_idx].instance_divisor) {
       struct x86_reg ptr = p->tmp_EAX;
       struct x86_reg buf_ptr = 
          x86_make_disp(p->machine_EDX, 
@@ -469,8 +471,19 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
 
       /* Calculate pointer to current attrib:
        */
-      x86_mov(p->func, ptr, buf_stride);
-      x86_imul(p->func, ptr, elt);
+      switch(index_size)
+      {
+      case 1:
+         x86_movzx8(p->func, ptr, elt);
+         break;
+      case 2:
+         x86_movzx16(p->func, ptr, elt);
+         break;
+      case 4:
+         x86_mov(p->func, ptr, elt);
+         break;
+      }
+      x86_imul(p->func, ptr, buf_stride);
       x86_add(p->func, ptr, buf_base_ptr);
       return ptr;
    }
@@ -479,9 +492,9 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
 
 
 static boolean incr_inputs( struct translate_sse *p, 
-                            boolean linear )
+                            unsigned index_size )
 {
-   if (linear && p->nr_buffer_varients == 1) {
+   if (!index_size && p->nr_buffer_varients == 1) {
       struct x86_reg stride = x86_make_disp(p->machine_EDX,
                                             get_offset(p, &p->buffer[0].stride));
 
@@ -490,7 +503,7 @@ static boolean incr_inputs( struct translate_sse *p,
          sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
       }
    }
-   else if (linear) {
+   else if (!index_size) {
       unsigned i;
 
       /* Is this worthwhile??
@@ -511,7 +524,7 @@ static boolean incr_inputs( struct translate_sse *p,
       }
    } 
    else {
-      x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4));
+      x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, index_size));
    }
    
    return TRUE;
@@ -536,7 +549,7 @@ static boolean incr_inputs( struct translate_sse *p,
  */
 static boolean build_vertex_emit( struct translate_sse *p,
 				  struct x86_function *func,
-				  boolean linear )
+				  unsigned index_size )
 {
    int fixup, label;
    unsigned j;
@@ -585,13 +598,13 @@ static boolean build_vertex_emit( struct translate_sse *p,
 
    /* always load, needed or not:
     */
-   init_inputs(p, linear);
+   init_inputs(p, index_size);
 
    /* Note address for loop jump
     */
    label = x86_get_label(p->func);
    {
-      struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
+      struct x86_reg elt = !index_size ? p->idx_EBX : x86_deref(p->idx_EBX);
       int last_varient = -1;
       struct x86_reg vb;
 
@@ -603,7 +616,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
           */
          if (varient != last_varient) {
             last_varient = varient;
-            vb = get_buffer_ptr(p, linear, varient, elt);
+            vb = get_buffer_ptr(p, index_size, varient, elt);
          }
          
          if (!translate_attr( p, a, 
@@ -621,7 +634,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
 
       /* Incr index
        */ 
-      incr_inputs( p, linear );
+      incr_inputs( p, index_size );
    }
 
    /* decr count, loop if not zero
@@ -736,10 +749,16 @@ struct translate *translate_sse2_create( const struct translate_key *key )
 
    if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
 
-   if (!build_vertex_emit(p, &p->linear_func, TRUE))
+   if (!build_vertex_emit(p, &p->linear_func, 0))
+      goto fail;
+
+   if (!build_vertex_emit(p, &p->elt_func, 4))
+      goto fail;
+
+   if (!build_vertex_emit(p, &p->elt16_func, 2))
       goto fail;
 
-   if (!build_vertex_emit(p, &p->elt_func, FALSE))
+   if (!build_vertex_emit(p, &p->elt8_func, 1))
       goto fail;
 
    p->translate.run = (void*)x86_get_func(&p->linear_func);
@@ -750,6 +769,14 @@ struct translate *translate_sse2_create( const struct translate_key *key )
    if (p->translate.run_elts == NULL)
       goto fail;
 
+   p->translate.run_elts16 = (void*)x86_get_func(&p->elt16_func);
+   if (p->translate.run_elts16 == NULL)
+      goto fail;
+
+   p->translate.run_elts8 = (void*)x86_get_func(&p->elt8_func);
+   if (p->translate.run_elts8 == NULL)
+      goto fail;
+
    return &p->translate;
 
  fail:
-- 
cgit v1.2.3


From a3e6e50544de74558ceb7cd4b618c350cdef36c6 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Thu, 12 Aug 2010 18:27:02 +0200
Subject: rtasm: add minimal x86-64 support and new instructions (v5)

Changes in v5:
- Add sse2_movdqa

Changes in v4:
- Use _WIN64 instead of WIN64

Changes in v3:
- Add target and target caps functions, so that they could be different in
  principle from the current CPU and they don't need #ifs to check

Changes in v2:
- Win64 support (untested)
- Use u_cpu_detect.h constants instead of #ifs

This commit adds minimal x86-64 support: only movs between registers
are supported for r8-r15, and x64_rexw() must be used to ask for 64-bit
operations.

It also adds several new instructions for the new translate_sse code.

movdqa
---
 src/gallium/auxiliary/rtasm/rtasm_cpu.c    |   6 +-
 src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 484 +++++++++++++++++++++++++++--
 src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 101 +++++-
 3 files changed, 551 insertions(+), 40 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index 2e15751e50..0461c81550 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -30,7 +30,7 @@
 #include "rtasm_cpu.h"
 
 
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 static boolean rtasm_sse_enabled(void)
 {
    static boolean firsttime = 1;
@@ -49,7 +49,7 @@ static boolean rtasm_sse_enabled(void)
 int rtasm_cpu_has_sse(void)
 {
    /* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
    return rtasm_sse_enabled();
 #else
    return 0;
@@ -59,7 +59,7 @@ int rtasm_cpu_has_sse(void)
 int rtasm_cpu_has_sse2(void) 
 {
    /* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
    return rtasm_sse_enabled();
 #else
    return 0;
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 63007c1feb..0fe6ebfcb4 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -22,8 +22,9 @@
  **************************************************************************/
 
 #include "pipe/p_config.h"
+#include "util/u_cpu_detect.h"
 
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h"
@@ -231,6 +232,10 @@ static void emit_modrm( struct x86_function *p,
    
    assert(reg.mod == mod_REG);
    
+   /* TODO: support extended x86-64 registers */
+   assert(reg.idx < 8);
+   assert(regmem.idx < 8);
+
    val |= regmem.mod << 6;     	/* mod field */
    val |= reg.idx << 3;		/* reg field */
    val |= regmem.idx;		/* r/m field */
@@ -363,6 +368,12 @@ int x86_get_label( struct x86_function *p )
  */
 
 
+void x64_rexw(struct x86_function *p)
+{
+   if(x86_target(p) != X86_32)
+      emit_1ub(p, 0x48);
+}
+
 void x86_jcc( struct x86_function *p,
 	      enum x86_cc cc,
 	      int label )
@@ -449,6 +460,52 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
    emit_1i(p, imm);
 }
 
+void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+   DUMP_RI( dst, imm );
+   if(dst.mod == mod_REG)
+      x86_mov_reg_imm(p, dst, imm);
+   else
+   {
+      emit_1ub(p, 0xc7);
+      emit_modrm_noreg(p, 0, dst);
+      emit_1i(p, imm);
+   }
+}
+
+void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm )
+{
+   DUMP_RI( dst, imm );
+   emit_1ub(p, 0x66);
+   if(dst.mod == mod_REG)
+   {
+      emit_1ub(p, 0xb8 + dst.idx);
+      emit_2ub(p, imm & 0xff, imm >> 8);
+   }
+   else
+   {
+      emit_1ub(p, 0xc7);
+      emit_modrm_noreg(p, 0, dst);
+      emit_2ub(p, imm & 0xff, imm >> 8);
+   }
+}
+
+void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm )
+{
+   DUMP_RI( dst, imm );
+   if(dst.mod == mod_REG)
+   {
+      emit_1ub(p, 0xb0 + dst.idx);
+      emit_1ub(p, imm);
+   }
+   else
+   {
+      emit_1ub(p, 0xc6);
+      emit_modrm_noreg(p, 0, dst);
+      emit_1ub(p, imm);
+   }
+}
+
 /**
  * Immediate group 1 instructions.
  */
@@ -520,7 +577,7 @@ void x86_push( struct x86_function *p,
    }
 
 
-   p->stack_offset += 4;
+   p->stack_offset += sizeof(void*);
 }
 
 void x86_push_imm32( struct x86_function *p,
@@ -530,7 +587,7 @@ void x86_push_imm32( struct x86_function *p,
    emit_1ub(p, 0x68);
    emit_1i(p,  imm32);
 
-   p->stack_offset += 4;
+   p->stack_offset += sizeof(void*);
 }
 
 
@@ -540,23 +597,33 @@ void x86_pop( struct x86_function *p,
    DUMP_R( reg );
    assert(reg.mod == mod_REG);
    emit_1ub(p, 0x58 + reg.idx);
-   p->stack_offset -= 4;
+   p->stack_offset -= sizeof(void*);
 }
 
 void x86_inc( struct x86_function *p,
 	      struct x86_reg reg )
 {
    DUMP_R( reg );
-   assert(reg.mod == mod_REG);
-   emit_1ub(p, 0x40 + reg.idx);
+   if(x86_target(p) == X86_32 && reg.mod == mod_REG)
+   {
+      emit_1ub(p, 0x40 + reg.idx);
+      return;
+   }
+   emit_1ub(p, 0xff);
+   emit_modrm_noreg(p, 0, reg);
 }
 
 void x86_dec( struct x86_function *p,
 	      struct x86_reg reg )
 {
    DUMP_R( reg );
-   assert(reg.mod == mod_REG);
-   emit_1ub(p, 0x48 + reg.idx);
+   if(x86_target(p) == X86_32 && reg.mod == mod_REG)
+   {
+      emit_1ub(p, 0x48 + reg.idx);
+      return;
+   }
+   emit_1ub(p, 0xff);
+   emit_modrm_noreg(p, 1, reg);
 }
 
 void x86_ret( struct x86_function *p )
@@ -583,6 +650,65 @@ void x86_mov( struct x86_function *p,
 	      struct x86_reg src )
 {
    DUMP_RR( dst, src );
+   /* special hack for reading arguments until we support x86-64 registers everywhere */
+   if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
+   {
+      uint8_t rex = 0x40;
+      if(dst.idx >= 8)
+      {
+         rex |= 4;
+         dst.idx -= 8;
+      }
+      if(src.idx >= 8)
+      {
+         rex |= 1;
+         src.idx -= 8;
+      }
+      emit_1ub(p, rex);
+   }
+   emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+
+void x86_mov16( struct x86_function *p,
+	      struct x86_reg dst,
+	      struct x86_reg src )
+{
+   DUMP_RR( dst, src );
+   emit_1ub(p, 0x66);
+   emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+
+void x86_mov8( struct x86_function *p,
+	      struct x86_reg dst,
+	      struct x86_reg src )
+{
+   DUMP_RR( dst, src );
+   emit_op_modrm( p, 0x8a, 0x88, dst, src );
+}
+
+void x64_mov64( struct x86_function *p,
+	      struct x86_reg dst,
+	      struct x86_reg src )
+{
+   uint8_t rex = 0x48;
+   DUMP_RR( dst, src );
+   assert(x86_target(p) != X86_32);
+
+   /* special hack for reading arguments until we support x86-64 registers everywhere */
+   if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
+   {
+      if(dst.idx >= 8)
+      {
+         rex |= 4;
+         dst.idx -= 8;
+      }
+      if(src.idx >= 8)
+      {
+         rex |= 1;
+         src.idx -= 8;
+      }
+   }
+   emit_1ub(p, rex);
    emit_op_modrm( p, 0x8b, 0x89, dst, src );
 }
 
@@ -694,6 +820,61 @@ void x86_div( struct x86_function *p,
    emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
 }
 
+void x86_bswap( struct x86_function *p, struct x86_reg reg )
+{
+   DUMP_R(reg);
+   assert(reg.file == file_REG32);
+   assert(reg.mod == mod_REG);
+   emit_2ub(p, 0x0f, 0xc8 + reg.idx);
+}
+
+void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+   DUMP_RI(reg, imm);
+   if(imm == 1)
+   {
+      emit_1ub(p, 0xd1);
+      emit_modrm_noreg(p, 5, reg);
+   }
+   else
+   {
+      emit_1ub(p, 0xc1);
+      emit_modrm_noreg(p, 5, reg);
+      emit_1ub(p, imm);
+   }
+}
+
+void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+   DUMP_RI(reg, imm);
+   if(imm == 1)
+   {
+      emit_1ub(p, 0xd1);
+      emit_modrm_noreg(p, 7, reg);
+   }
+   else
+   {
+      emit_1ub(p, 0xc1);
+      emit_modrm_noreg(p, 7, reg);
+      emit_1ub(p, imm);
+   }
+}
+
+void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm  )
+{
+   DUMP_RI(reg, imm);
+   if(imm == 1)
+   {
+      emit_1ub(p, 0xd1);
+      emit_modrm_noreg(p, 4, reg);
+   }
+   else
+   {
+      emit_1ub(p, 0xc1);
+      emit_modrm_noreg(p, 4, reg);
+      emit_1ub(p, imm);
+   }
+}
 
 
 /***********************************************************************
@@ -1027,6 +1208,77 @@ void sse_movmskps( struct x86_function *p,
  * SSE2 instructions
  */
 
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR(dst, src);
+   emit_2ub(p, 0x66, 0x0f);
+   if(dst.mod == mod_REG && dst.file == file_REG32)
+   {
+      emit_1ub(p, 0x7e);
+      emit_modrm(p, src, dst);
+   }
+   else
+   {
+      emit_op_modrm(p, 0x6e, 0x7e, dst, src);
+   }
+}
+
+void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR(dst, src);
+   switch (dst.mod) {
+   case mod_REG:
+      emit_3ub(p, 0xf3, 0x0f, 0x7e);
+      emit_modrm(p, dst, src);
+      break;
+   case mod_INDIRECT:
+   case mod_DISP32:
+   case mod_DISP8:
+      assert(src.mod == mod_REG);
+      emit_3ub(p, 0x66, 0x0f, 0xd6);
+      emit_modrm(p, src, dst);
+      break;
+   default:
+      assert(0);
+      break;
+   }
+}
+
+void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR(dst, src);
+   emit_2ub(p, 0xf3, 0x0f);
+   emit_op_modrm(p, 0x6f, 0x7f, dst, src);
+}
+
+void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR(dst, src);
+   emit_2ub(p, 0x66, 0x0f);
+   emit_op_modrm(p, 0x6f, 0x7f, dst, src);
+}
+
+void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR(dst, src);
+   emit_2ub(p, 0xf2, 0x0f);
+   emit_op_modrm(p, 0x10, 0x11, dst, src);
+}
+
+void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR(dst, src);
+   emit_2ub(p, 0x66, 0x0f);
+   emit_op_modrm(p, 0x10, 0x11, dst, src);
+}
+
+void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR(dst, src);
+   emit_2ub(p, 0x66, 0x0f);
+   emit_op_modrm(p, 0x28, 0x29, dst, src);
+}
+
 /**
  * Perform a reduced swizzle:
  */
@@ -1041,6 +1293,28 @@ void sse2_pshufd( struct x86_function *p,
    emit_1ub(p, shuf); 
 }
 
+void sse2_pshuflw( struct x86_function *p,
+                  struct x86_reg dst,
+                  struct x86_reg src,
+                  unsigned char shuf)
+{
+   DUMP_RRI( dst, src, shuf );
+   emit_3ub(p, 0xf2, X86_TWOB, 0x70);
+   emit_modrm(p, dst, src);
+   emit_1ub(p, shuf);
+}
+
+void sse2_pshufhw( struct x86_function *p,
+                  struct x86_reg dst,
+                  struct x86_reg src,
+                  unsigned char shuf)
+{
+   DUMP_RRI( dst, src, shuf );
+   emit_3ub(p, 0xf3, X86_TWOB, 0x70);
+   emit_modrm(p, dst, src);
+   emit_1ub(p, shuf);
+}
+
 void sse2_cvttps2dq( struct x86_function *p,
                      struct x86_reg dst,
                      struct x86_reg src )
@@ -1059,6 +1333,24 @@ void sse2_cvtps2dq( struct x86_function *p,
    emit_modrm( p, dst, src );
 }
 
+void sse2_cvtsd2ss( struct x86_function *p,
+                    struct x86_reg dst,
+                    struct x86_reg src )
+{
+   DUMP_RR( dst, src );
+   emit_3ub(p, 0xf2, 0x0f, 0x5a);
+   emit_modrm( p, dst, src );
+}
+
+void sse2_cvtpd2ps( struct x86_function *p,
+                    struct x86_reg dst,
+                    struct x86_reg src )
+{
+   DUMP_RR( dst, src );
+   emit_3ub(p, 0x66, 0x0f, 0x5a);
+   emit_modrm( p, dst, src );
+}
+
 void sse2_packssdw( struct x86_function *p,
 		    struct x86_reg dst,
 		    struct x86_reg src )
@@ -1095,6 +1387,97 @@ void sse2_punpcklbw( struct x86_function *p,
    emit_modrm( p, dst, src );
 }
 
+void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR( dst, src );
+   emit_3ub(p, 0x66, 0x0f, 0x61);
+   emit_modrm( p, dst, src );
+}
+
+void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR( dst, src );
+   emit_3ub(p, 0x66, 0x0f, 0x62);
+   emit_modrm( p, dst, src );
+}
+
+void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR( dst, src );
+   emit_3ub(p, 0x66, 0x0f, 0x6c);
+   emit_modrm( p, dst, src );
+}
+
+void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+   DUMP_RI(dst, imm);
+   emit_3ub(p, 0x66, 0x0f, 0x71);
+   emit_modrm_noreg(p, 6, dst);
+   emit_1ub(p, imm);
+}
+
+void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+   DUMP_RI(dst, imm);
+   emit_3ub(p, 0x66, 0x0f, 0x72);
+   emit_modrm_noreg(p, 6, dst);
+   emit_1ub(p, imm);
+}
+
+void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+   DUMP_RI(dst, imm);
+   emit_3ub(p, 0x66, 0x0f, 0x73);
+   emit_modrm_noreg(p, 6, dst);
+   emit_1ub(p, imm);
+}
+
+void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+   DUMP_RI(dst, imm);
+   emit_3ub(p, 0x66, 0x0f, 0x71);
+   emit_modrm_noreg(p, 2, dst);
+   emit_1ub(p, imm);
+}
+
+void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+   DUMP_RI(dst, imm);
+   emit_3ub(p, 0x66, 0x0f, 0x72);
+   emit_modrm_noreg(p, 2, dst);
+   emit_1ub(p, imm);
+}
+
+void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+   DUMP_RI(dst, imm);
+   emit_3ub(p, 0x66, 0x0f, 0x73);
+   emit_modrm_noreg(p, 2, dst);
+   emit_1ub(p, imm);
+}
+
+void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+   DUMP_RI(dst, imm);
+   emit_3ub(p, 0x66, 0x0f, 0x71);
+   emit_modrm_noreg(p, 4, dst);
+   emit_1ub(p, imm);
+}
+
+void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+   DUMP_RI(dst, imm);
+   emit_3ub(p, 0x66, 0x0f, 0x72);
+   emit_modrm_noreg(p, 4, dst);
+   emit_1ub(p, imm);
+}
+
+void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR(dst, src);
+   emit_3ub(p, 0x66, 0x0f, 0xeb);
+   emit_modrm(p, dst, src);
+}
 
 void sse2_rcpps( struct x86_function *p,
                  struct x86_reg dst,
@@ -1114,18 +1497,6 @@ void sse2_rcpss( struct x86_function *p,
    emit_modrm( p, dst, src );
 }
 
-void sse2_movd( struct x86_function *p,
-		struct x86_reg dst,
-		struct x86_reg src )
-{
-   DUMP_RR( dst, src );
-   emit_2ub(p, 0x66, X86_TWOB);
-   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
-}
-
-
-
-
 /***********************************************************************
  * x87 instructions
  */
@@ -1716,23 +2087,79 @@ void x86_cdecl_caller_pop_regs( struct x86_function *p )
 }
 
 
-/* Retreive a reference to one of the function arguments, taking into
- * account any push/pop activity:
- */
 struct x86_reg x86_fn_arg( struct x86_function *p,
-			   unsigned arg )
+                           unsigned arg )
 {
-   return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 
+   switch(x86_target(p))
+   {
+   case X86_64_WIN64_ABI:
+      /* Microsoft uses a different calling convention than the rest of the world */
+      switch(arg)
+      {
+      case 1:
+         return x86_make_reg(file_REG32, reg_CX);
+      case 2:
+         return x86_make_reg(file_REG32, reg_DX);
+      case 3:
+         return x86_make_reg(file_REG32, reg_R8);
+      case 4:
+         return x86_make_reg(file_REG32, reg_R9);
+      default:
+         return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+               p->stack_offset + (arg - 4) * 8);     /* ??? */
+      }
+   case X86_64_STD_ABI:
+      switch(arg)
+      {
+      case 1:
+         return x86_make_reg(file_REG32, reg_DI);
+      case 2:
+         return x86_make_reg(file_REG32, reg_SI);
+      case 3:
+         return x86_make_reg(file_REG32, reg_DX);
+      case 4:
+         return x86_make_reg(file_REG32, reg_CX);
+      case 5:
+         return x86_make_reg(file_REG32, reg_R8);
+      case 6:
+         return x86_make_reg(file_REG32, reg_R9);
+      default:
+         return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+               p->stack_offset + (arg - 6) * 8);     /* ??? */
+      }
+   case X86_32:
+      return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
 			p->stack_offset + arg * 4);	/* ??? */
+   default:
+      abort();
+   }
 }
 
+static void x86_init_func_common( struct x86_function *p )
+{
+   util_cpu_detect();
+   p->caps = 0;
+   if(util_cpu_caps.has_mmx)
+      p->caps |= X86_MMX;
+   if(util_cpu_caps.has_mmx2)
+      p->caps |= X86_MMX2;
+   if(util_cpu_caps.has_sse)
+      p->caps |= X86_SSE;
+   if(util_cpu_caps.has_sse2)
+      p->caps |= X86_SSE2;
+   if(util_cpu_caps.has_sse3)
+      p->caps |= X86_SSE3;
+   if(util_cpu_caps.has_sse4_1)
+      p->caps |= X86_SSE4_1;
+   p->csr = p->store;
+   DUMP_START();
+}
 
 void x86_init_func( struct x86_function *p )
 {
    p->size = 0;
    p->store = NULL;
-   p->csr = p->store;
-   DUMP_START();
+   x86_init_func_common(p);
 }
 
 void x86_init_func_size( struct x86_function *p, unsigned code_size )
@@ -1742,8 +2169,7 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size )
    if (p->store == NULL) {
       p->store = p->error_overflow;
    }
-   p->csr = p->store;
-   DUMP_START();
+   x86_init_func_common(p);
 }
 
 void x86_release_func( struct x86_function *p )
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 365dec109e..aa77892b2d 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -26,20 +26,28 @@
 
 #include "pipe/p_config.h"
 
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 
 /* It is up to the caller to ensure that instructions issued are
  * suitable for the host cpu.  There are no checks made in this module
  * for mmx/sse/sse2 support on the cpu.
  */
 struct x86_reg {
-   unsigned file:3;
-   unsigned idx:3;
+   unsigned file:2;
+   unsigned idx:4;
    unsigned mod:2;		/* mod_REG if this is just a register */
    int      disp:24;		/* only +/- 23bits of offset - should be enough... */
 };
 
+#define X86_MMX 1
+#define X86_MMX2 2
+#define X86_SSE 4
+#define X86_SSE2 8
+#define X86_SSE3 0x10
+#define X86_SSE4_1 0x20
+
 struct x86_function {
+   unsigned caps;
    unsigned size;
    unsigned char *store;
    unsigned char *csr;
@@ -75,7 +83,15 @@ enum x86_reg_name {
    reg_SP,
    reg_BP,
    reg_SI,
-   reg_DI
+   reg_DI,
+   reg_R8,
+   reg_R9,
+   reg_R10,
+   reg_R11,
+   reg_R12,
+   reg_R13,
+   reg_R14,
+   reg_R15
 };
 
 
@@ -110,6 +126,29 @@ typedef void (*x86_func)(void);
 /* Begin/end/retrieve function creation:
  */
 
+enum x86_target
+{
+   X86_32,
+   X86_64_STD_ABI,
+   X86_64_WIN64_ABI
+};
+
+/* make this read a member of x86_function if target != host is desired */
+static INLINE enum x86_target x86_target( struct x86_function* p )
+{
+#ifdef PIPE_ARCH_X86
+   return X86_32;
+#elif defined(_WIN64)
+   return X86_64_WIN64_ABI;
+#elif defined(PIPE_ARCH_X86_64)
+   return X86_64_STD_ABI;
+#endif
+}
+
+static INLINE unsigned x86_target_caps( struct x86_function* p )
+{
+   return p->caps;
+}
 
 void x86_init_func( struct x86_function *p );
 void x86_init_func_size( struct x86_function *p, unsigned code_size );
@@ -138,6 +177,8 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
  */
 int x86_get_label( struct x86_function *p );
 
+void x64_rexw(struct x86_function *p);
+
 void x86_jcc( struct x86_function *p,
 	      enum x86_cc cc,
 	      int label );
@@ -178,18 +219,54 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
 void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
                   unsigned char shuf );
+void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+                  unsigned char shuf );
+void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+                  unsigned char shuf );
 void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 
+void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
+void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
+void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
 
 void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
 void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
@@ -227,7 +304,6 @@ void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg
 void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
-void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
 
 void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -237,8 +313,14 @@ void x86_dec( struct x86_function *p, struct x86_reg reg );
 void x86_inc( struct x86_function *p, struct x86_reg reg );
 void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
-void x86_movzx8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
-void x86_movzx16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
+void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
+void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
 void x86_mul( struct x86_function *p, struct x86_reg src );
 void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -252,7 +334,10 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void x86_sahf( struct x86_function *p );
 void x86_div( struct x86_function *p, struct x86_reg src );
-
+void x86_bswap( struct x86_function *p, struct x86_reg src );
+void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
+void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
+void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm  );
 
 void x86_cdecl_caller_push_regs( struct x86_function *p );
 void x86_cdecl_caller_pop_regs( struct x86_function *p );
-- 
cgit v1.2.3


From c2da8e77023325f46dde2009def2947b1a687c7b Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Tue, 10 Aug 2010 02:14:04 +0200
Subject: translate_sse: major rewrite (v5)

NOTE: Win64 is untested, and is thus currently disabled.
If you have such a system, please enable it and report whether it works.
To enable it, change src/gallium/auxiliary/translate/translate.c

Changes in v5:
- On Win64, preserve %xmm6 and %xmm7 as required by the ABI
- Use _WIN64 instead of WIN64

Changes in v4:
- Use x86_target() and x86_target_caps()
- Enable translate_sse in x86-64, but not in Win64

Changes in v3:
- Win64 support (untested)
- Use u_cpu_detect.h constants instead of #ifs

Changes in v2:
- Minimize #ifs
- Give a name to magic number CHANNELS_0001
- Add support for CPUs without SSE (only memcpy and swizzles, like non SSE2)
- Fixed comments

translate_sse is currently very limited to the point of
being useless in essentially all cases.

In particular, it only support some float32 and unorm8
formats and doesn't work on x86-64.

This commit rewrites it to support:
1. Dumb memory copy for any pair of identical formats
2. All formats that are swizzles of each other
3. Converting 32/64-bit floats and all 8/16/32-bit integers to 32-bit float
4. Converting unorm8/snorm8 to snorm16 and uscaled8/sscaled8 to sscaled16
5. Support for x86-64 (doesn't take advantage of it in any way though)

This new translate can even be useful to translate index buffers for
cards that lack 8-bit index support.

It passes the testsuite I wrote, but note that this is a major change, and more
testing would be great.
---
 src/gallium/auxiliary/translate/translate.c     |    3 +-
 src/gallium/auxiliary/translate/translate_sse.c | 1172 ++++++++++++++++++-----
 2 files changed, 936 insertions(+), 239 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c
index fe638e211f..03a7f050aa 100644
--- a/src/gallium/auxiliary/translate/translate.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -38,7 +38,8 @@ struct translate *translate_create( const struct translate_key *key )
 {
    struct translate *translate = NULL;
 
-#if defined(PIPE_ARCH_X86)
+/* TODO: enable Win64 once it has actually been tested */
+#if defined(PIPE_ARCH_X86) || (defined(PIPE_ARCH_X86_64) && !defined(_WIN64))
    translate = translate_sse2_create( key );
    if (translate)
       return translate;
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index f9aab9232c..c06197c5d6 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -30,11 +30,12 @@
 #include "pipe/p_compiler.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/u_format.h"
 
 #include "translate.h"
 
 
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 
 #include "rtasm/rtasm_cpu.h"
 #include "rtasm/rtasm_x86sse.h"
@@ -48,7 +49,7 @@
 
 struct translate_buffer {
    const void *base_ptr;
-   unsigned stride;
+   uintptr_t stride;
    unsigned max_index;
 };
 
@@ -72,12 +73,10 @@ struct translate_sse {
    struct x86_function *func;
 
    boolean loaded_identity;
-   boolean loaded_255;
-   boolean loaded_inv_255;
+   boolean loaded_const[5];
 
    float identity[4];
-   float float_255[4];
-   float inv_255[4];
+   float const_value[5][4];
 
    struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
    unsigned nr_buffers;
@@ -96,10 +95,12 @@ struct translate_sse {
     * like this is helpful to keep them in sync across the file.
     */
    struct x86_reg tmp_EAX;
-   struct x86_reg idx_EBX;     /* either start+i or &elt[i] */
-   struct x86_reg outbuf_ECX;
-   struct x86_reg machine_EDX;
-   struct x86_reg count_ESI;    /* decrements to zero */
+   struct x86_reg tmp2_EDX;
+   struct x86_reg tmp3_ECX;
+   struct x86_reg idx_ESI;     /* either start+i or &elt[i] */
+   struct x86_reg machine_EDI;
+   struct x86_reg outbuf_EBX;
+   struct x86_reg count_EBP;    /* decrements to zero */
 };
 
 static int get_offset( const void *a, const void *b )
@@ -111,7 +112,7 @@ static int get_offset( const void *a, const void *b )
 
 static struct x86_reg get_identity( struct translate_sse *p )
 {
-   struct x86_reg reg = x86_make_reg(file_XMM, 6);
+   struct x86_reg reg = x86_make_reg(file_XMM, 7);
 
    if (!p->loaded_identity) {
       p->loaded_identity = TRUE;
@@ -121,253 +122,910 @@ static struct x86_reg get_identity( struct translate_sse *p )
       p->identity[3] = 1;
 
       sse_movups(p->func, reg, 
-		 x86_make_disp(p->machine_EDX, 
+		 x86_make_disp(p->machine_EDI,
 			       get_offset(p, &p->identity[0])));
    }
 
    return reg;
 }
 
-static struct x86_reg get_255( struct translate_sse *p )
+static struct x86_reg get_const( struct translate_sse *p, unsigned i, float v)
 {
-   struct x86_reg reg = x86_make_reg(file_XMM, 7);
-
-   if (!p->loaded_255) {
-      p->loaded_255 = TRUE;
-      p->float_255[0] =
-	 p->float_255[1] =
-	 p->float_255[2] =
-	 p->float_255[3] = 255.0f;
-
-      sse_movups(p->func, reg, 
-		 x86_make_disp(p->machine_EDX, 
-			       get_offset(p, &p->float_255[0])));
+   struct x86_reg reg = x86_make_reg(file_XMM, 2 + i);
+
+   if (!p->loaded_const[i]) {
+      p->loaded_const[i] = TRUE;
+      p->const_value[i][0] =
+         p->const_value[i][1] =
+         p->const_value[i][2] =
+         p->const_value[i][3] = v;
+
+      sse_movups(p->func, reg,
+                 x86_make_disp(p->machine_EDI,
+                               get_offset(p, &p->const_value[i][0])));
    }
 
    return reg;
 }
 
-static struct x86_reg get_inv_255( struct translate_sse *p )
+static struct x86_reg get_inv_127( struct translate_sse *p )
 {
-   struct x86_reg reg = x86_make_reg(file_XMM, 5);
-
-   if (!p->loaded_inv_255) {
-      p->loaded_inv_255 = TRUE;
-      p->inv_255[0] =
-	 p->inv_255[1] =
-	 p->inv_255[2] =
-	 p->inv_255[3] = 1.0f / 255.0f;
-
-      sse_movups(p->func, reg, 
-		 x86_make_disp(p->machine_EDX, 
-			       get_offset(p, &p->inv_255[0])));
-   }
-
-   return reg;
+   return get_const(p, 0, 1.0f / 127.0f);
 }
 
-
-static void emit_load_R32G32B32A32( struct translate_sse *p, 			   
-				    struct x86_reg data,
-				    struct x86_reg arg0 )
+static struct x86_reg get_inv_255( struct translate_sse *p )
 {
-   sse_movups(p->func, data, arg0);
+   return get_const(p, 1, 1.0f / 255.0f);
 }
 
-static void emit_load_R32G32B32( struct translate_sse *p, 			   
-				 struct x86_reg data,
-				 struct x86_reg arg0 )
+static struct x86_reg get_inv_32767( struct translate_sse *p )
 {
-   /* Have to jump through some hoops:
-    *
-    * c 0 0 0
-    * c 0 0 1
-    * 0 0 c 1
-    * a b c 1
-    */
-   sse_movss(p->func, data, x86_make_disp(arg0, 8));
-   sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
-   sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
-   sse_movlps(p->func, data, arg0);
+   return get_const(p, 2, 1.0f / 32767.0f);
 }
 
-static void emit_load_R32G32( struct translate_sse *p, 
-			   struct x86_reg data,
-			   struct x86_reg arg0 )
+static struct x86_reg get_inv_65535( struct translate_sse *p )
 {
-   /* 0 0 0 1
-    * a b 0 1
-    */
-   sse_movups(p->func, data, get_identity(p) );
-   sse_movlps(p->func, data, arg0);
+   return get_const(p, 3, 1.0f / 65535.0f);
 }
 
-
-static void emit_load_R32( struct translate_sse *p, 
-			   struct x86_reg data,
-			   struct x86_reg arg0 )
+static struct x86_reg get_inv_2147483647( struct translate_sse *p )
 {
-   /* a 0 0 0
-    * a 0 0 1
-    */
-   sse_movss(p->func, data, arg0);
-   sse_orps(p->func, data, get_identity(p) );
+   return get_const(p, 4, 1.0f / 2147483647.0f);
 }
 
-
-static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
+/* load the data in a SSE2 register, padding with zeros */
+static boolean emit_load_sse2( struct translate_sse *p,
 				       struct x86_reg data,
-				       struct x86_reg src )
+				       struct x86_reg src,
+				       unsigned size)
 {
-
-   /* Load and unpack twice:
-    */
-   sse_movss(p->func, data, src);
-   sse2_punpcklbw(p->func, data, get_identity(p));
-   sse2_punpcklbw(p->func, data, get_identity(p));
-
-   /* Convert to float:
-    */
-   sse2_cvtdq2ps(p->func, data, data);
-
-
-   /* Scale by 1/255.0
-    */
-   sse_mulps(p->func, data, get_inv_255(p));
+   struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+   struct x86_reg tmp = p->tmp_EAX;
+   switch(size)
+   {
+   case 1:
+      x86_movzx8(p->func, tmp, src);
+      sse2_movd(p->func, data, tmp);
+      break;
+   case 2:
+      x86_movzx16(p->func, tmp, src);
+      sse2_movd(p->func, data, tmp);
+   case 3:
+      x86_movzx8(p->func, tmp, x86_make_disp(src, 2));
+      x86_shl_imm(p->func, tmp, 16);
+      x86_mov16(p->func, tmp, src);
+      sse2_movd(p->func, data, tmp);
+   case 4:
+      sse2_movd(p->func, data, src);
+      break;
+   case 6:
+      sse2_movd(p->func, data, src);
+      x86_movzx16(p->func, tmp, x86_make_disp(src, 4));
+      sse2_movd(p->func, tmpXMM, tmp);
+      sse2_punpckldq(p->func, data, tmpXMM);
+      break;
+   case 8:
+      sse2_movq(p->func, data, src);
+      break;
+   case 12:
+      sse2_movq(p->func, data, src);
+      sse2_movd(p->func, tmpXMM, x86_make_disp(src, 8));
+      sse2_punpcklqdq(p->func, data, tmpXMM);
+      break;
+   case 16:
+      sse2_movdqu(p->func, data, src);
+      break;
+   default:
+      return FALSE;
+   }
+   return TRUE;
 }
 
+/* this value can be passed for the out_chans argument */
+#define CHANNELS_0001 5
 
+/* this function will load #chans float values, and will
+ * pad the register with zeroes at least up to out_chans.
+ *
+ * If out_chans is set to CHANNELS_0001, then the fourth
+ * value will be padded with 1. Only pass this value if
+ * chans < 4 or results are undefined.
+ */
+static void emit_load_float32( struct translate_sse *p,
+                                       struct x86_reg data,
+                                       struct x86_reg arg0,
+                                       unsigned out_chans,
+                                       unsigned chans)
+{
+   switch(chans)
+   {
+   case 1:
+      /* a 0 0 0
+       * a 0 0 1
+       */
+      sse_movss(p->func, data, arg0);
+      if(out_chans == CHANNELS_0001)
+         sse_orps(p->func, data, get_identity(p) );
+      break;
+   case 2:
+      /* 0 0 0 1
+       * a b 0 1
+       */
+      if(out_chans == CHANNELS_0001)
+         sse_shufps(p->func, data, get_identity(p), SHUF(X, Y, Z, W) );
+      else if(out_chans > 2)
+         sse_movlhps(p->func, data, get_identity(p) );
+      sse_movlps(p->func, data, arg0);
+      break;
+   case 3:
+      /* Have to jump through some hoops:
+       *
+       * c 0 0 0
+       * c 0 0 1 if out_chans == CHANNELS_0001
+       * 0 0 c 0/1
+       * a b c 0/1
+       */
+      sse_movss(p->func, data, x86_make_disp(arg0, 8));
+      if(out_chans == CHANNELS_0001)
+         sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
+      sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
+      sse_movlps(p->func, data, arg0);
+      break;
+   case 4:
+      sse_movups(p->func, data, arg0);
+      break;
+   }
+}
 
+/* this function behaves like emit_load_float32, but loads
+   64-bit floating point numbers, converting them to 32-bit
+  ones */
+static void emit_load_float64to32( struct translate_sse *p,
+                                       struct x86_reg data,
+                                       struct x86_reg arg0,
+                                       unsigned out_chans,
+                                       unsigned chans)
+{
+   struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+   switch(chans)
+   {
+   case 1:
+      sse2_movsd(p->func, data, arg0);
+      if(out_chans > 1)
+         sse2_cvtpd2ps(p->func, data, data);
+      else
+         sse2_cvtsd2ss(p->func, data, data);
+      if(out_chans == CHANNELS_0001)
+         sse_shufps(p->func, data, get_identity(p), SHUF(X, Y, Z, W)  );
+      break;
+   case 2:
+      sse2_movupd(p->func, data, arg0);
+      sse2_cvtpd2ps(p->func, data, data);
+      if(out_chans == CHANNELS_0001)
+         sse_shufps(p->func, data, get_identity(p), SHUF(X, Y, Z, W) );
+      else if(out_chans > 2)
+         sse_movlhps(p->func, data, get_identity(p) );
+       break;
+   case 3:
+      sse2_movupd(p->func, data, arg0);
+      sse2_cvtpd2ps(p->func, data, data);
+      sse2_movsd(p->func, tmpXMM, x86_make_disp(arg0, 16));
+      if(out_chans > 3)
+         sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
+      else
+         sse2_cvtsd2ss(p->func, tmpXMM, tmpXMM);
+      sse_movlhps(p->func, data, tmpXMM);
+      if(out_chans == CHANNELS_0001)
+         sse_orps(p->func, data, get_identity(p) );
+      break;
+   case 4:
+      sse2_movupd(p->func, data, arg0);
+      sse2_cvtpd2ps(p->func, data, data);
+      sse2_movupd(p->func, tmpXMM, x86_make_disp(arg0, 16));
+      sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
+      sse_movlhps(p->func, data, tmpXMM);
+      break;
+   }
+}
 
-static void emit_store_R32G32B32A32( struct translate_sse *p, 			   
-				     struct x86_reg dest,
-				     struct x86_reg dataXMM )
+static void emit_mov64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src_gpr,  struct x86_reg src_xmm)
 {
-   sse_movups(p->func, dest, dataXMM);
+   if(x86_target(p->func) != X86_32)
+      x64_mov64(p->func, dst_gpr, src_gpr);
+   else
+   {
+      /* TODO: when/on which CPUs is SSE2 actually better than SSE? */
+      if(x86_target_caps(p->func) & X86_SSE2)
+         sse2_movq(p->func, dst_xmm, src_xmm);
+      else
+         sse_movlps(p->func, dst_xmm, src_xmm);
+   }
 }
 
-static void emit_store_R32G32B32( struct translate_sse *p, 
-				  struct x86_reg dest,
-				  struct x86_reg dataXMM )
+static void emit_load64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src)
 {
-   /* Emit two, shuffle, emit one.
-    */
-   sse_movlps(p->func, dest, dataXMM);
-   sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
-   sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
+   emit_mov64(p, dst_gpr, dst_xmm, src, src);
 }
 
-static void emit_store_R32G32( struct translate_sse *p, 
-			       struct x86_reg dest,
-			       struct x86_reg dataXMM )
+static void emit_store64(struct translate_sse *p, struct x86_reg dst, struct x86_reg src_gpr, struct x86_reg src_xmm)
 {
-   sse_movlps(p->func, dest, dataXMM);
+   emit_mov64(p, dst, dst, src_gpr, src_xmm);
 }
 
-static void emit_store_R32( struct translate_sse *p, 
-			    struct x86_reg dest,
-			    struct x86_reg dataXMM )
+static void emit_mov128(struct translate_sse *p, struct x86_reg dst, struct x86_reg src)
 {
-   sse_movss(p->func, dest, dataXMM);
+   if(x86_target_caps(p->func) & X86_SSE2)
+      sse2_movdqu(p->func, dst, src);
+   else
+      sse_movups(p->func, dst, src);
 }
 
+/* TODO: this uses unaligned accesses liberally, which is great on Nehalem,
+ * but may or may not be good on older processors
+ * TODO: may perhaps want to use non-temporal stores here if possible
+ */
+static void emit_memcpy(struct translate_sse *p, struct x86_reg dst, struct x86_reg src, unsigned size)
+{
+   struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+   struct x86_reg dataXMM2 = x86_make_reg(file_XMM, 1);
+   struct x86_reg dataGPR = p->tmp_EAX;
+   struct x86_reg dataGPR2 = p->tmp2_EDX;
 
+   if(size < 8)
+   {
+      switch (size)
+      {
+      case 1:
+         x86_mov8(p->func, dataGPR, src);
+         x86_mov8(p->func, dst, dataGPR);
+         break;
+      case 2:
+         x86_mov16(p->func, dataGPR, src);
+         x86_mov16(p->func, dst, dataGPR);
+         break;
+      case 3:
+         x86_mov16(p->func, dataGPR, src);
+         x86_mov8(p->func, dataGPR2, x86_make_disp(src, 2));
+         x86_mov16(p->func, dst, dataGPR);
+         x86_mov8(p->func, x86_make_disp(dst, 2), dataGPR2);
+         break;
+      case 4:
+         x86_mov(p->func, dataGPR, src);
+         x86_mov(p->func, dst, dataGPR);
+         break;
+      case 6:
+         x86_mov(p->func, dataGPR, src);
+         x86_mov16(p->func, dataGPR2, x86_make_disp(src, 4));
+         x86_mov(p->func, dst, dataGPR);
+         x86_mov16(p->func, x86_make_disp(dst, 4), dataGPR2);
+         break;
+      }
+   }
+   else if(!(x86_target_caps(p->func) & X86_SSE))
+   {
+      unsigned i = 0;
+      assert((size & 3) == 0);
+      for(i = 0; i < size; i += 4)
+      {
+         x86_mov(p->func, dataGPR, x86_make_disp(src, i));
+         x86_mov(p->func, x86_make_disp(dst, i), dataGPR);
+      }
+   }
+   else
+   {
+      switch(size)
+      {
+      case 8:
+         emit_load64(p, dataGPR, dataXMM, src);
+         emit_store64(p, dst, dataGPR, dataXMM);
+         break;
+      case 12:
+         emit_load64(p, dataGPR2, dataXMM, src);
+         x86_mov(p->func, dataGPR, x86_make_disp(src, 8));
+         emit_store64(p, dst, dataGPR2, dataXMM);
+         x86_mov(p->func, x86_make_disp(dst, 8), dataGPR);
+         break;
+      case 16:
+         emit_mov128(p, dataXMM, src);
+         emit_mov128(p, dst, dataXMM);
+         break;
+      case 24:
+         emit_mov128(p, dataXMM, src);
+         emit_load64(p, dataGPR, dataXMM2, x86_make_disp(src, 16));
+         emit_mov128(p, dst, dataXMM);
+         emit_store64(p, x86_make_disp(dst, 16), dataGPR, dataXMM2);
+         break;
+      case 32:
+         emit_mov128(p, dataXMM, src);
+         emit_mov128(p, dataXMM2, x86_make_disp(src, 16));
+         emit_mov128(p, dst, dataXMM);
+         emit_mov128(p, x86_make_disp(dst, 16), dataXMM2);
+         break;
+      default:
+         assert(0);
+      }
+   }
+}
+
+static boolean translate_attr_convert( struct translate_sse *p,
+                               const struct translate_element *a,
+                               struct x86_reg src,
+                               struct x86_reg dst)
 
-static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
-				       struct x86_reg dest,
-				       struct x86_reg dataXMM )
 {
-   /* Scale by 255.0
-    */
-   sse_mulps(p->func, dataXMM, get_255(p));
+   const struct util_format_description* input_desc = util_format_description(a->input_format);
+   const struct util_format_description* output_desc = util_format_description(a->output_format);
+   unsigned i;
+   boolean id_swizzle = TRUE;
+   unsigned swizzle[4] = {UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE};
+   unsigned needed_chans = 0;
+   unsigned imms[2] = {0, 0x3f800000};
 
-   /* Pack and emit:
-    */
-   sse2_cvtps2dq(p->func, dataXMM, dataXMM);
-   sse2_packssdw(p->func, dataXMM, dataXMM);
-   sse2_packuswb(p->func, dataXMM, dataXMM);
-   sse_movss(p->func, dest, dataXMM);
-}
+   if(a->output_format == PIPE_FORMAT_NONE || a->input_format == PIPE_FORMAT_NONE)
+      return FALSE;
 
+   if(input_desc->channel[0].size & 7)
+      return FALSE;
 
+   if(input_desc->colorspace != output_desc->colorspace)
+      return FALSE;
 
+   for(i = 1; i < input_desc->nr_channels; ++i)
+   {
+      if(memcmp(&input_desc->channel[i], &input_desc->channel[0], sizeof(input_desc->channel[0])))
+         return FALSE;
+   }
 
+   for(i = 1; i < output_desc->nr_channels; ++i)
+   {
+      if(memcmp(&output_desc->channel[i], &output_desc->channel[0], sizeof(output_desc->channel[0])))
+         return FALSE;
+   }
 
-/* Extended swizzles?  Maybe later.
- */  
-static void emit_swizzle( struct translate_sse *p,
-			  struct x86_reg dest,
-			  struct x86_reg src,
-			  unsigned char shuffle )
-{
-   sse_shufps(p->func, dest, src, shuffle);
-}
+   for(i = 0; i < output_desc->nr_channels; ++i)
+   {
+      if(output_desc->swizzle[i] < 4)
+         swizzle[output_desc->swizzle[i]] = input_desc->swizzle[i];
+   }
 
+   if((x86_target_caps(p->func) & X86_SSE) && (0
+         || a->output_format == PIPE_FORMAT_R32_FLOAT
+         || a->output_format == PIPE_FORMAT_R32G32_FLOAT
+         || a->output_format == PIPE_FORMAT_R32G32B32_FLOAT
+         || a->output_format == PIPE_FORMAT_R32G32B32A32_FLOAT))
+   {
+      struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+      struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
 
-static boolean translate_attr( struct translate_sse *p,
-			       const struct translate_element *a,
-			       struct x86_reg srcECX,
-			       struct x86_reg dstEAX)
-{
-   struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+      for(i = 0; i < output_desc->nr_channels; ++i)
+      {
+         if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
+            swizzle[i] = i;
+      }
 
-   switch (a->input_format) {
-   case PIPE_FORMAT_R32_FLOAT:
-      emit_load_R32(p, dataXMM, srcECX);
-      break;
-   case PIPE_FORMAT_R32G32_FLOAT:
-      emit_load_R32G32(p, dataXMM, srcECX);
-      break;
-   case PIPE_FORMAT_R32G32B32_FLOAT:
-      emit_load_R32G32B32(p, dataXMM, srcECX);
-      break;
-   case PIPE_FORMAT_R32G32B32A32_FLOAT:
-      emit_load_R32G32B32A32(p, dataXMM, srcECX);
-      break;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-      emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
-      emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
-      break;
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
-      emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
-      break;
-   default:
-      return FALSE;
+      for(i = 0; i < output_desc->nr_channels; ++i)
+      {
+         if(swizzle[i] < 4)
+            needed_chans = MAX2(needed_chans, swizzle[i] + 1);
+         if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
+            id_swizzle = FALSE;
+      }
+
+      if(needed_chans > 0)
+      {
+         switch(input_desc->channel[0].type)
+         {
+         case UTIL_FORMAT_TYPE_UNSIGNED:
+            if(!(x86_target_caps(p->func) & X86_SSE2))
+               return FALSE;
+            emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+            /* TODO: add support for SSE4.1 pmovzx */
+            switch(input_desc->channel[0].size)
+            {
+            case 8:
+               /* TODO: this may be inefficient due to get_identity() being used both as a float and integer register */
+               sse2_punpcklbw(p->func, dataXMM, get_identity(p));
+               sse2_punpcklbw(p->func, dataXMM, get_identity(p));
+               break;
+            case 16:
+               sse2_punpcklwd(p->func, dataXMM, get_identity(p));
+               break;
+            case 32: /* we lose precision here */
+               sse2_psrld_imm(p->func, dataXMM, 1);
+               break;
+            default:
+               return FALSE;
+            }
+            sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+            if(input_desc->channel[0].normalized)
+            {
+               struct x86_reg factor;
+               switch(input_desc->channel[0].size)
+               {
+               case 8:
+                  factor = get_inv_255(p);
+                  break;
+               case 16:
+                  factor = get_inv_65535(p);
+                  break;
+               case 32:
+                  factor = get_inv_2147483647(p);
+                  break;
+               }
+               sse_mulps(p->func, dataXMM, factor);
+            }
+            else if(input_desc->channel[0].size == 32)
+               sse_addps(p->func, dataXMM, dataXMM); /* compensate for the bit we threw away to fit u32 into s32 */
+            break;
+         case UTIL_FORMAT_TYPE_SIGNED:
+            if(!(x86_target_caps(p->func) & X86_SSE2))
+               return FALSE;
+            emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+            /* TODO: add support for SSE4.1 pmovsx */
+            switch(input_desc->channel[0].size)
+            {
+            case 8:
+               sse2_punpcklbw(p->func, dataXMM, dataXMM);
+               sse2_punpcklbw(p->func, dataXMM, dataXMM);
+               sse2_psrad_imm(p->func, dataXMM, 24);
+               break;
+            case 16:
+               sse2_punpcklwd(p->func, dataXMM, dataXMM);
+               sse2_psrad_imm(p->func, dataXMM, 16);
+               break;
+            case 32: /* we lose precision here */
+               break;
+            default:
+               return FALSE;
+            }
+            sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+            if(input_desc->channel[0].normalized)
+            {
+               struct x86_reg factor;
+               switch(input_desc->channel[0].size)
+               {
+               case 8:
+                  factor = get_inv_127(p);
+                  break;
+               case 16:
+                  factor = get_inv_32767(p);
+                  break;
+               case 32:
+                  factor = get_inv_2147483647(p);
+                  break;
+               }
+               sse_mulps(p->func, dataXMM, factor);
+            }
+            break;
+
+            break;
+         case UTIL_FORMAT_TYPE_FLOAT:
+            if(input_desc->channel[0].size != 32 && input_desc->channel[0].size != 64)
+               return FALSE;
+            if(swizzle[3] == UTIL_FORMAT_SWIZZLE_1 && input_desc->nr_channels <= 3)
+            {
+               swizzle[3] = UTIL_FORMAT_SWIZZLE_W;
+               needed_chans = CHANNELS_0001;
+            }
+            switch(input_desc->channel[0].size)
+            {
+            case 32:
+               emit_load_float32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
+               break;
+            case 64: /* we lose precision here */
+               if(!(x86_target_caps(p->func) & X86_SSE2))
+                  return FALSE;
+               emit_load_float64to32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
+               break;
+            default:
+               return FALSE;
+            }
+            break;
+         default:
+            return FALSE;
+         }
+
+         if(!id_swizzle)
+            sse_shufps(p->func, dataXMM, dataXMM, SHUF(swizzle[0], swizzle[1], swizzle[2], swizzle[3]) );
+      }
+
+      if(output_desc->nr_channels >= 4
+            && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+            && swizzle[1] < UTIL_FORMAT_SWIZZLE_0
+            && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+            && swizzle[3] < UTIL_FORMAT_SWIZZLE_0
+            )
+         sse_movups(p->func, dst, dataXMM);
+      else
+      {
+         if(output_desc->nr_channels >= 2
+               && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+               && swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+            sse_movlps(p->func, dst, dataXMM);
+         else
+         {
+            if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
+               sse_movss(p->func, dst, dataXMM);
+            else
+               x86_mov_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+
+            if(output_desc->nr_channels >= 2)
+            {
+               if(swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+               {
+                  sse_shufps(p->func, dataXMM, dataXMM, SHUF(1, 1, 2, 3));
+                  sse_movss(p->func, x86_make_disp(dst, 4), dataXMM);
+               }
+               else
+                  x86_mov_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
+            }
+         }
+
+         if(output_desc->nr_channels >= 3)
+         {
+            if(output_desc->nr_channels >= 4
+                  && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+                  && swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+               sse_movhps(p->func, x86_make_disp(dst, 8), dataXMM);
+            else
+            {
+               if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
+               {
+                  sse_shufps(p->func, dataXMM, dataXMM, SHUF(2, 2, 2, 3));
+                  sse_movss(p->func, x86_make_disp(dst, 8), dataXMM);
+               }
+               else
+                  x86_mov_imm(p->func, x86_make_disp(dst, 8), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+
+               if(output_desc->nr_channels >= 4)
+               {
+                  if(swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+                  {
+                     sse_shufps(p->func, dataXMM, dataXMM, SHUF(3, 3, 3, 3));
+                     sse_movss(p->func, x86_make_disp(dst, 12), dataXMM);
+                  }
+                  else
+                     x86_mov_imm(p->func, x86_make_disp(dst, 12), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
+               }
+            }
+         }
+      }
+      return TRUE;
    }
+   else if((x86_target_caps(p->func) & X86_SSE2) && input_desc->channel[0].size == 8 && output_desc->channel[0].size == 16
+         && output_desc->channel[0].normalized == input_desc->channel[0].normalized
+         && (0
+               || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
+               || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+               || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+               ))
+   {
+      struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+      struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+      struct x86_reg tmp = p->tmp_EAX;
+      unsigned imms[2] = {0, 1};
 
-   switch (a->output_format) {
-   case PIPE_FORMAT_R32_FLOAT:
-      emit_store_R32(p, dstEAX, dataXMM);
-      break;
-   case PIPE_FORMAT_R32G32_FLOAT:
-      emit_store_R32G32(p, dstEAX, dataXMM);
-      break;
-   case PIPE_FORMAT_R32G32B32_FLOAT:
-      emit_store_R32G32B32(p, dstEAX, dataXMM);
-      break;
-   case PIPE_FORMAT_R32G32B32A32_FLOAT:
-      emit_store_R32G32B32A32(p, dstEAX, dataXMM);
-      break;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-      emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
-      emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
-      break;
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
-      emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
-      break;
-   default:
-      return FALSE;
+      for(i = 0; i < output_desc->nr_channels; ++i)
+      {
+         if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
+            swizzle[i] = i;
+      }
+
+      for(i = 0; i < output_desc->nr_channels; ++i)
+      {
+         if(swizzle[i] < 4)
+            needed_chans = MAX2(needed_chans, swizzle[i] + 1);
+         if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
+            id_swizzle = FALSE;
+      }
+
+      if(needed_chans > 0)
+      {
+         emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+         switch(input_desc->channel[0].type)
+         {
+         case UTIL_FORMAT_TYPE_UNSIGNED:
+            if(input_desc->channel[0].normalized)
+            {
+               sse2_punpcklbw(p->func, dataXMM, dataXMM);
+               if(output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+        	       sse2_psrlw_imm(p->func, dataXMM, 1);
+            }
+            else
+               sse2_punpcklbw(p->func, dataXMM, get_identity(p));
+            break;
+         case UTIL_FORMAT_TYPE_SIGNED:
+            if(input_desc->channel[0].normalized)
+            {
+               sse2_movq(p->func, tmpXMM, get_identity(p));
+               sse2_punpcklbw(p->func, tmpXMM, dataXMM);
+               sse2_psllw_imm(p->func, dataXMM, 9);
+               sse2_psrlw_imm(p->func, dataXMM, 8);
+               sse2_por(p->func, tmpXMM, dataXMM);
+               sse2_psrlw_imm(p->func, dataXMM, 7);
+               sse2_por(p->func, tmpXMM, dataXMM);
+               {
+                  struct x86_reg t = dataXMM;
+                  dataXMM = tmpXMM;
+                  tmpXMM = t;
+               }
+            }
+            else
+            {
+               sse2_punpcklbw(p->func, dataXMM, dataXMM);
+               sse2_psraw_imm(p->func, dataXMM, 8);
+            }
+            break;
+         default:
+            assert(0);
+         }
+
+         if(output_desc->channel[0].normalized)
+            imms[1] = (output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) ? 0xffff : 0x7ffff;
+
+         if(!id_swizzle)
+            sse2_pshuflw(p->func, dataXMM, dataXMM, (swizzle[0] & 3) | ((swizzle[1] & 3) << 2) | ((swizzle[2] & 3) << 4) | ((swizzle[3] & 3) << 6));
+      }
+
+      if(output_desc->nr_channels >= 4
+            && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+            && swizzle[1] < UTIL_FORMAT_SWIZZLE_0
+            && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+            && swizzle[3] < UTIL_FORMAT_SWIZZLE_0
+            )
+         sse2_movq(p->func, dst, dataXMM);
+      else
+      {
+         if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
+         {
+            if(output_desc->nr_channels >= 2 && swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+               sse2_movd(p->func, dst, dataXMM);
+            else
+            {
+               sse2_movd(p->func, tmp, dataXMM);
+               x86_mov16(p->func, dst, tmp);
+               if(output_desc->nr_channels >= 2)
+                  x86_mov16_imm(p->func, x86_make_disp(dst, 2), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
+            }
+         }
+         else
+         {
+            if(output_desc->nr_channels >= 2 && swizzle[1] >= UTIL_FORMAT_SWIZZLE_0)
+               x86_mov_imm(p->func, dst, (imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+            else
+            {
+               x86_mov16_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+               if(output_desc->nr_channels >= 2)
+               {
+                  sse2_movd(p->func, tmp, dataXMM);
+                  x86_shr_imm(p->func, tmp, 16);
+                  x86_mov16(p->func, x86_make_disp(dst, 2), tmp);
+               }
+            }
+         }
+
+         if(output_desc->nr_channels >= 3)
+         {
+            if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
+            {
+               if(output_desc->nr_channels >= 4 && swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+               {
+                  sse2_psrlq_imm(p->func, dataXMM, 32);
+                  sse2_movd(p->func, x86_make_disp(dst, 4), dataXMM);
+               }
+               else
+               {
+                  sse2_psrlq_imm(p->func, dataXMM, 32);
+                  sse2_movd(p->func, tmp, dataXMM);
+                  x86_mov16(p->func, x86_make_disp(dst, 4), tmp);
+                  if(output_desc->nr_channels >= 4)
+                  {
+                     x86_mov16_imm(p->func, x86_make_disp(dst, 6), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
+                  }
+               }
+            }
+            else
+            {
+               if(output_desc->nr_channels >= 4 && swizzle[3] >= UTIL_FORMAT_SWIZZLE_0)
+                  x86_mov_imm(p->func, x86_make_disp(dst, 4), (imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+               else
+               {
+                  x86_mov16_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+
+                  if(output_desc->nr_channels >= 4)
+                  {
+                     sse2_psrlq_imm(p->func, dataXMM, 48);
+                     sse2_movd(p->func, tmp, dataXMM);
+                     x86_mov16(p->func, x86_make_disp(dst, 6), tmp);
+                  }
+               }
+            }
+         }
+      }
+      return TRUE;
    }
+   else if(!memcmp(&output_desc->channel[0], &input_desc->channel[0], sizeof(output_desc->channel[0])))
+   {
+      struct x86_reg tmp = p->tmp_EAX;
+      if(input_desc->channel[0].size == 8 && input_desc->nr_channels == 4 && output_desc->nr_channels == 4
+                     && swizzle[0] == UTIL_FORMAT_SWIZZLE_W
+                     && swizzle[1] == UTIL_FORMAT_SWIZZLE_Z
+                     && swizzle[2] == UTIL_FORMAT_SWIZZLE_Y
+                     && swizzle[3] == UTIL_FORMAT_SWIZZLE_X)
+      {
+         /* TODO: support movbe */
+         x86_mov(p->func, tmp, src);
+         x86_bswap(p->func, tmp);
+         x86_mov(p->func, dst, tmp);
+         return TRUE;
+      }
 
-   return TRUE;
+      for(unsigned i = 0; i < output_desc->nr_channels; ++i)
+      {
+         switch(output_desc->channel[0].size)
+         {
+         case 8:
+            if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+            {
+               unsigned v = 0;
+               if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+               {
+                  switch(output_desc->channel[0].type)
+                  {
+                  case UTIL_FORMAT_TYPE_UNSIGNED:
+                     v = output_desc->channel[0].normalized ? 0xff : 1;
+                     break;
+                  case UTIL_FORMAT_TYPE_SIGNED:
+                     v = output_desc->channel[0].normalized ? 0x7f : 1;
+                     break;
+                  default:
+                     return FALSE;
+                  }
+               }
+               x86_mov8_imm(p->func, x86_make_disp(dst, i * 1), v);
+            }
+            else
+            {
+               x86_mov8(p->func, tmp, x86_make_disp(src, swizzle[i] * 1));
+               x86_mov8(p->func, x86_make_disp(dst, i * 1), tmp);
+            }
+            break;
+         case 16:
+            if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+            {
+               unsigned v = 0;
+               if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+               {
+                  switch(output_desc->channel[1].type)
+                  {
+                  case UTIL_FORMAT_TYPE_UNSIGNED:
+                     v = output_desc->channel[1].normalized ? 0xffff : 1;
+                     break;
+                  case UTIL_FORMAT_TYPE_SIGNED:
+                     v = output_desc->channel[1].normalized ? 0x7fff : 1;
+                     break;
+                  case UTIL_FORMAT_TYPE_FLOAT:
+                     v = 0x3c00;
+                     break;
+                  default:
+                     return FALSE;
+                  }
+               }
+               x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), v);
+            }
+            else if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0)
+               x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), 0);
+            else
+            {
+               x86_mov16(p->func, tmp, x86_make_disp(src, swizzle[i] * 2));
+               x86_mov16(p->func, x86_make_disp(dst, i * 2), tmp);
+            }
+            break;
+         case 32:
+            if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+            {
+               unsigned v = 0;
+               if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+               {
+                  switch(output_desc->channel[1].type)
+                  {
+                  case UTIL_FORMAT_TYPE_UNSIGNED:
+                     v = output_desc->channel[1].normalized ? 0xffffffff : 1;
+                     break;
+                  case UTIL_FORMAT_TYPE_SIGNED:
+                     v = output_desc->channel[1].normalized ? 0x7fffffff : 1;
+                     break;
+                  case UTIL_FORMAT_TYPE_FLOAT:
+                     v = 0x3f800000;
+                     break;
+                  default:
+                     return FALSE;
+                  }
+               }
+               x86_mov_imm(p->func, x86_make_disp(dst, i * 4), v);
+            }
+            else
+            {
+               x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 4));
+               x86_mov(p->func, x86_make_disp(dst, i * 4), tmp);
+            }
+            break;
+         case 64:
+            if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+            {
+               unsigned l = 0;
+               unsigned h = 0;
+               if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+               {
+                  switch(output_desc->channel[1].type)
+                  {
+                  case UTIL_FORMAT_TYPE_UNSIGNED:
+                     h = output_desc->channel[1].normalized ? 0xffffffff : 0;
+                     l = output_desc->channel[1].normalized ? 0xffffffff : 1;
+                     break;
+                  case UTIL_FORMAT_TYPE_SIGNED:
+                     h = output_desc->channel[1].normalized ? 0x7fffffff : 0;
+                     l = output_desc->channel[1].normalized ? 0xffffffff : 1;
+                     break;
+                  case UTIL_FORMAT_TYPE_FLOAT:
+                     h = 0x3ff00000;
+                     l = 0;
+                     break;
+                  default:
+                     return FALSE;
+                  }
+               }
+               x86_mov_imm(p->func, x86_make_disp(dst, i * 8), l);
+               x86_mov_imm(p->func, x86_make_disp(dst, i * 8 + 4), h);
+            }
+            else
+            {
+               if(x86_target_caps(p->func) & X86_SSE)
+               {
+                  struct x86_reg tmpXMM = x86_make_reg(file_XMM, 0);
+                  emit_load64(p, tmp, tmpXMM, x86_make_disp(src, swizzle[i] * 8));
+                  emit_store64(p, x86_make_disp(dst, i * 8), tmp, tmpXMM);
+               }
+               else
+               {
+                  x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8));
+                  x86_mov(p->func, x86_make_disp(dst, i * 8), tmp);
+                  x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8 + 4));
+                  x86_mov(p->func, x86_make_disp(dst, i * 8 + 4), tmp);
+               }
+            }
+            break;
+         default:
+            return FALSE;
+         }
+      }
+      return TRUE;
+   }
+   return FALSE;
 }
 
+static boolean translate_attr( struct translate_sse *p,
+			       const struct translate_element *a,
+			       struct x86_reg src,
+			       struct x86_reg dst)
+{
+   if(a->input_format == a->output_format)
+   {
+      emit_memcpy(p, dst, src, util_format_get_stride(a->input_format, 1));
+      return TRUE;
+   }
+
+   return translate_attr_convert(p, a, src, dst);
+}
 
 static boolean init_inputs( struct translate_sse *p,
                             unsigned index_size )
 {
    unsigned i;
-   struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
+   struct x86_reg instance_id = x86_make_disp(p->machine_EDI,
                                               get_offset(p, &p->instance_id));
 
    for (i = 0; i < p->nr_buffer_varients; i++) {
@@ -375,13 +1033,13 @@ static boolean init_inputs( struct translate_sse *p,
       struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
 
       if (!index_size || varient->instance_divisor) {
-         struct x86_reg buf_stride   = x86_make_disp(p->machine_EDX,
+         struct x86_reg buf_stride   = x86_make_disp(p->machine_EDI,
                                                      get_offset(p, &buffer->stride));
-         struct x86_reg buf_ptr      = x86_make_disp(p->machine_EDX,
+         struct x86_reg buf_ptr      = x86_make_disp(p->machine_EDI,
                                                      get_offset(p, &varient->ptr));
-         struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
+         struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI,
                                                      get_offset(p, &buffer->base_ptr));
-         struct x86_reg elt = p->idx_EBX;
+         struct x86_reg elt = p->idx_ESI;
          struct x86_reg tmp_EAX = p->tmp_EAX;
 
          /* Calculate pointer to first attrib:
@@ -393,20 +1051,16 @@ static boolean init_inputs( struct translate_sse *p,
             x86_mov(p->func, tmp_EAX, instance_id);
 
             if (varient->instance_divisor != 1) {
-               struct x86_reg tmp_EDX = p->machine_EDX;
-               struct x86_reg tmp_ECX = p->outbuf_ECX;
+               struct x86_reg tmp_EDX = p->tmp2_EDX;
+               struct x86_reg tmp_ECX = p->tmp3_ECX;
 
                /* TODO: Add x86_shr() to rtasm and use it whenever
                 *       instance divisor is power of two.
                 */
 
-               x86_push(p->func, tmp_EDX);
-               x86_push(p->func, tmp_ECX);
                x86_xor(p->func, tmp_EDX, tmp_EDX);
                x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
                x86_div(p->func, tmp_ECX);    /* EAX = EDX:EAX / ECX */
-               x86_pop(p->func, tmp_ECX);
-               x86_pop(p->func, tmp_EDX);
             }
          } else {
             x86_mov(p->func, tmp_EAX, elt);
@@ -417,6 +1071,7 @@ static boolean init_inputs( struct translate_sse *p,
           */
 
          x86_imul(p->func, tmp_EAX, buf_stride);
+         x64_rexw(p->func);
          x86_add(p->func, tmp_EAX, buf_base_ptr);
 
 
@@ -424,9 +1079,15 @@ static boolean init_inputs( struct translate_sse *p,
           * index number.
           */
          if (!index_size && p->nr_buffer_varients == 1)
+         {
+            x64_rexw(p->func);
             x86_mov(p->func, elt, tmp_EAX);
+         }
          else
+         {
+            x64_rexw(p->func);
             x86_mov(p->func, buf_ptr, tmp_EAX);
+         }
       }
    }
 
@@ -440,18 +1101,19 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
                                       struct x86_reg elt )
 {
    if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
-      return x86_make_disp(p->machine_EDX,
+      return x86_make_disp(p->machine_EDI,
                            get_offset(p, &p->instance_id));
    }
    if (!index_size && p->nr_buffer_varients == 1) {
-      return p->idx_EBX;
+      return p->idx_ESI;
    }
    else if (!index_size || p->buffer_varient[var_idx].instance_divisor) {
       struct x86_reg ptr = p->tmp_EAX;
       struct x86_reg buf_ptr = 
-         x86_make_disp(p->machine_EDX, 
+         x86_make_disp(p->machine_EDI,
                        get_offset(p, &p->buffer_varient[var_idx].ptr));
       
+      x64_rexw(p->func);
       x86_mov(p->func, ptr, buf_ptr);
       return ptr;
    }
@@ -460,11 +1122,11 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
       const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
 
       struct x86_reg buf_stride = 
-         x86_make_disp(p->machine_EDX, 
+         x86_make_disp(p->machine_EDI,
                        get_offset(p, &p->buffer[varient->buffer_index].stride));
 
       struct x86_reg buf_base_ptr = 
-         x86_make_disp(p->machine_EDX, 
+         x86_make_disp(p->machine_EDI,
                        get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
 
 
@@ -484,6 +1146,7 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
          break;
       }
       x86_imul(p->func, ptr, buf_stride);
+      x64_rexw(p->func);
       x86_add(p->func, ptr, buf_base_ptr);
       return ptr;
    }
@@ -495,12 +1158,13 @@ static boolean incr_inputs( struct translate_sse *p,
                             unsigned index_size )
 {
    if (!index_size && p->nr_buffer_varients == 1) {
-      struct x86_reg stride = x86_make_disp(p->machine_EDX,
+      struct x86_reg stride = x86_make_disp(p->machine_EDI,
                                             get_offset(p, &p->buffer[0].stride));
 
       if (p->buffer_varient[0].instance_divisor == 0) {
-         x86_add(p->func, p->idx_EBX, stride);
-         sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
+         x64_rexw(p->func);
+         x86_add(p->func, p->idx_ESI, stride);
+         sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192));
       }
    }
    else if (!index_size) {
@@ -510,21 +1174,23 @@ static boolean incr_inputs( struct translate_sse *p,
        */
       for (i = 0; i < p->nr_buffer_varients; i++) {
          struct translate_buffer_varient *varient = &p->buffer_varient[i];
-         struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
+         struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
                                                 get_offset(p, &varient->ptr));
-         struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
+         struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
                                                    get_offset(p, &p->buffer[varient->buffer_index].stride));
 
          if (varient->instance_divisor == 0) {
-            x86_mov(p->func, p->tmp_EAX, buf_ptr);
-            x86_add(p->func, p->tmp_EAX, buf_stride);
+            x86_mov(p->func, p->tmp_EAX, buf_stride);
+            x64_rexw(p->func);
+            x86_add(p->func, p->tmp_EAX, buf_ptr);
             if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
+            x64_rexw(p->func);
             x86_mov(p->func, buf_ptr, p->tmp_EAX);
          }
       }
    } 
    else {
-      x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, index_size));
+      x86_lea(p->func, p->idx_ESI, x86_make_disp(p->idx_ESI, index_size));
    }
    
    return TRUE;
@@ -555,29 +1221,45 @@ static boolean build_vertex_emit( struct translate_sse *p,
    unsigned j;
 
    p->tmp_EAX       = x86_make_reg(file_REG32, reg_AX);
-   p->idx_EBX       = x86_make_reg(file_REG32, reg_BX);
-   p->outbuf_ECX    = x86_make_reg(file_REG32, reg_CX);
-   p->machine_EDX   = x86_make_reg(file_REG32, reg_DX);
-   p->count_ESI     = x86_make_reg(file_REG32, reg_SI);
+   p->idx_ESI       = x86_make_reg(file_REG32, reg_SI);
+   p->outbuf_EBX    = x86_make_reg(file_REG32, reg_BX);
+   p->machine_EDI   = x86_make_reg(file_REG32, reg_DI);
+   p->count_EBP     = x86_make_reg(file_REG32, reg_BP);
+   p->tmp2_EDX     = x86_make_reg(file_REG32, reg_DX);
+   p->tmp3_ECX     = x86_make_reg(file_REG32, reg_CX);
 
    p->func = func;
-   p->loaded_inv_255 = FALSE;
-   p->loaded_255 = FALSE;
+   memset(&p->loaded_const, 0, sizeof(p->loaded_const));
    p->loaded_identity = FALSE;
 
    x86_init_func(p->func);
 
-   /* Push a few regs?
-    */
-   x86_push(p->func, p->idx_EBX);
-   x86_push(p->func, p->count_ESI);
+   if(x86_target(p->func) == X86_64_WIN64_ABI)
+   {
+	   /* the ABI guarantees a 16-byte aligned 32-byte "shadow space" above the return address */
+	   sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8), x86_make_reg(file_XMM, 6));
+	   sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24), x86_make_reg(file_XMM, 7));
+   }
 
-   /* Load arguments into regs:
-    */
-   x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
-   x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
-   x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
-   x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
+   x86_push(p->func, p->outbuf_EBX);
+   x86_push(p->func, p->count_EBP);
+
+/* on non-Win64 x86-64, these are already in the right registers */
+   if(x86_target(p->func) != X86_64_STD_ABI)
+   {
+      x86_push(p->func, p->machine_EDI);
+      x86_push(p->func, p->idx_ESI);
+
+      x86_mov(p->func, p->machine_EDI, x86_fn_arg(p->func, 1));
+      x86_mov(p->func, p->idx_ESI, x86_fn_arg(p->func, 2));
+   }
+
+   x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3));
+
+   if(x86_target(p->func) != X86_32)
+      x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
+   else
+      x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
 
    /* Load instance ID.
     */
@@ -586,14 +1268,14 @@ static boolean build_vertex_emit( struct translate_sse *p,
               p->tmp_EAX,
               x86_fn_arg(p->func, 4));
       x86_mov(p->func,
-              x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
+              x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)),
               p->tmp_EAX);
    }
 
    /* Get vertex count, compare to zero
     */
    x86_xor(p->func, p->tmp_EAX, p->tmp_EAX);
-   x86_cmp(p->func, p->count_ESI, p->tmp_EAX);
+   x86_cmp(p->func, p->count_EBP, p->tmp_EAX);
    fixup = x86_jcc_forward(p->func, cc_E);
 
    /* always load, needed or not:
@@ -604,7 +1286,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
     */
    label = x86_get_label(p->func);
    {
-      struct x86_reg elt = !index_size ? p->idx_EBX : x86_deref(p->idx_EBX);
+      struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI);
       int last_varient = -1;
       struct x86_reg vb;
 
@@ -621,15 +1303,16 @@ static boolean build_vertex_emit( struct translate_sse *p,
          
          if (!translate_attr( p, a, 
                               x86_make_disp(vb, a->input_offset), 
-                              x86_make_disp(p->outbuf_ECX, a->output_offset)))
+                              x86_make_disp(p->outbuf_EBX, a->output_offset)))
             return FALSE;
       }
 
       /* Next output vertex:
        */
+      x64_rexw(p->func);
       x86_lea(p->func, 
-              p->outbuf_ECX, 
-              x86_make_disp(p->outbuf_ECX, 
+              p->outbuf_EBX,
+              x86_make_disp(p->outbuf_EBX,
                             p->translate.key.output_stride));
 
       /* Incr index
@@ -639,7 +1322,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
 
    /* decr count, loop if not zero
     */
-   x86_dec(p->func, p->count_ESI);
+   x86_dec(p->func, p->count_EBP);
    x86_jcc(p->func, cc_NZ, label);
 
    /* Exit mmx state?
@@ -654,8 +1337,20 @@ static boolean build_vertex_emit( struct translate_sse *p,
    /* Pop regs and return
     */
    
-   x86_pop(p->func, p->count_ESI);
-   x86_pop(p->func, p->idx_EBX);
+   if(x86_target(p->func) != X86_64_STD_ABI)
+   {
+      x86_pop(p->func, p->idx_ESI);
+      x86_pop(p->func, p->machine_EDI);
+   }
+
+   x86_pop(p->func, p->count_EBP);
+   x86_pop(p->func, p->outbuf_EBX);
+
+   if(x86_target(p->func) == X86_64_WIN64_ABI)
+   {
+	   sse2_movdqa(p->func, x86_make_reg(file_XMM, 6), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8));
+	   sse2_movdqa(p->func, x86_make_reg(file_XMM, 7), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24));
+   }
    x86_ret(p->func);
 
    return TRUE;
@@ -704,7 +1399,8 @@ struct translate *translate_sse2_create( const struct translate_key *key )
    struct translate_sse *p = NULL;
    unsigned i;
 
-   if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
+   /* this is misnamed, it actually refers to whether rtasm is enabled or not */
+   if (!rtasm_cpu_has_sse())
       goto fail;
 
    p = CALLOC_STRUCT( translate_sse );
-- 
cgit v1.2.3


From f201217c1d87919572a3b1cf7de94f580f20e5f0 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sun, 15 Aug 2010 07:37:31 +0200
Subject: draw_llvm: fix segfaults on non-SSE2 CPUs where it is disabled (v2)

Changes in v2:
- Change function name

Currently draw_llvm refuses to create itself on non-SSE2 CPUs due to
an alleged LLVM bug.

However, this is implemented improperly, because other parts of draw
still attempt to access draw->llvm, resulting in segfaults.

Instead, put the check in debug_get_option_draw_use_llvm, check that
before calling draw_llvm_create, and then check whether draw->llvm is
non-null everywhere else.
---
 src/gallium/auxiliary/draw/draw_context.c | 37 ++++++++++++++++++++++++++-----
 src/gallium/auxiliary/draw/draw_llvm.c    |  7 ------
 src/gallium/auxiliary/draw/draw_pt.c      |  5 +----
 3 files changed, 32 insertions(+), 17 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 995b675b9a..d118a8db52 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -34,6 +34,7 @@
 #include "pipe/p_context.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/u_cpu_detect.h"
 #include "draw_context.h"
 #include "draw_vs.h"
 #include "draw_gs.h"
@@ -41,6 +42,25 @@
 #if HAVE_LLVM
 #include "gallivm/lp_bld_init.h"
 #include "draw_llvm.h"
+
+static boolean
+draw_get_option_use_llvm(void)
+{
+   static boolean first = TRUE;
+   static boolean value;
+   if (first) {
+      first = FALSE;
+      value = debug_get_bool_option("DRAW_USE_LLVM", TRUE);
+
+#ifdef PIPE_ARCH_X86
+      util_cpu_detect();
+      /* require SSE2 due to LLVM PR6960. */
+      if (!util_cpu_caps.has_sse2)
+         value = FALSE;
+#endif
+   }
+   return value;
+}
 #endif
 
 struct draw_context *draw_create( struct pipe_context *pipe )
@@ -50,10 +70,13 @@ struct draw_context *draw_create( struct pipe_context *pipe )
       goto fail;
 
 #if HAVE_LLVM
-   lp_build_init();
-   assert(lp_build_engine);
-   draw->engine = lp_build_engine;
-   draw->llvm = draw_llvm_create(draw);
+   if(draw_get_option_use_llvm())
+   {
+      lp_build_init();
+      assert(lp_build_engine);
+      draw->engine = lp_build_engine;
+      draw->llvm = draw_llvm_create(draw);
+   }
 #endif
 
    if (!draw_init(draw))
@@ -135,7 +158,8 @@ void draw_destroy( struct draw_context *draw )
    draw_vs_destroy( draw );
    draw_gs_destroy( draw );
 #ifdef HAVE_LLVM
-   draw_llvm_destroy( draw->llvm );
+   if(draw->llvm)
+      draw_llvm_destroy( draw->llvm );
 #endif
 
    FREE( draw );
@@ -659,7 +683,8 @@ draw_set_mapped_texture(struct draw_context *draw,
                         const void *data[DRAW_MAX_TEXTURE_LEVELS])
 {
 #ifdef HAVE_LLVM
-   draw_llvm_set_mapped_texture(draw,
+   if(draw->llvm)
+      draw_llvm_set_mapped_texture(draw,
                                 sampler_idx,
                                 width, height, depth, last_level,
                                 row_stride, img_stride, data);
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 8d53601d19..58d3e345e5 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -210,13 +210,6 @@ draw_llvm_create(struct draw_context *draw)
 {
    struct draw_llvm *llvm;
 
-#ifdef PIPE_ARCH_X86
-   util_cpu_detect();
-   /* require SSE2 due to LLVM PR6960. */
-   if (!util_cpu_caps.has_sse2)
-       return NULL;
-#endif
-
    llvm = CALLOC_STRUCT( draw_llvm );
    if (!llvm)
       return NULL;
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index b80fc8f552..feacd8258b 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -43,9 +43,6 @@
 
 DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
 DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE)
-#ifdef HAVE_LLVM
-DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
-#endif
 
 /* Overall we split things into:
  *     - frontend -- prepare fetch_elts, draw_elts - eg vsplit
@@ -140,7 +137,7 @@ boolean draw_pt_init( struct draw_context *draw )
       return FALSE;
 
 #if HAVE_LLVM
-   if (debug_get_option_draw_use_llvm())
+   if (draw->llvm)
       draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw );
 #endif
 
-- 
cgit v1.2.3


From ded92e5dd8eb39bf2a486a6ce95cbef595149582 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 16 Aug 2010 17:18:14 +0100
Subject: translate: Eliminate void pointer arithmetic.

Non-portable.
---
 src/gallium/auxiliary/translate/translate_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 975f23a6f4..ad809db720 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -372,7 +372,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *
 
    for (attr = 0; attr < nr_attrs; attr++) {
       float data[4];
-      char *dst = vert + tg->attrib[attr].output_offset;
+      uint8_t *dst = (uint8_t *)vert + tg->attrib[attr].output_offset;
 
       if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
          const uint8_t *src;
-- 
cgit v1.2.3


From b421cb954673e487074c806d6f98722e46abd4f0 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 16 Aug 2010 17:20:54 +0100
Subject: translate: Remove unused temporary register.

Assuming the side-effect of x86_make_reg is also unnecessary.
---
 src/gallium/auxiliary/translate/translate_sse.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index c06197c5d6..035ba531c6 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -495,7 +495,6 @@ static boolean translate_attr_convert( struct translate_sse *p,
          || a->output_format == PIPE_FORMAT_R32G32B32A32_FLOAT))
    {
       struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
-      struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
 
       for(i = 0; i < output_desc->nr_channels; ++i)
       {
-- 
cgit v1.2.3


From f437ee85f4a6789d7c3be0d68fd26aa257557b83 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Mon, 16 Aug 2010 13:52:57 -0700
Subject: translate: Move loop variable declaration outside for loop.

Fixes MSVC build.
---
 src/gallium/auxiliary/translate/translate_sse.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 035ba531c6..56c5b36ce2 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -849,6 +849,7 @@ static boolean translate_attr_convert( struct translate_sse *p,
    else if(!memcmp(&output_desc->channel[0], &input_desc->channel[0], sizeof(output_desc->channel[0])))
    {
       struct x86_reg tmp = p->tmp_EAX;
+      unsigned i;
       if(input_desc->channel[0].size == 8 && input_desc->nr_channels == 4 && output_desc->nr_channels == 4
                      && swizzle[0] == UTIL_FORMAT_SWIZZLE_W
                      && swizzle[1] == UTIL_FORMAT_SWIZZLE_Z
@@ -862,7 +863,7 @@ static boolean translate_attr_convert( struct translate_sse *p,
          return TRUE;
       }
 
-      for(unsigned i = 0; i < output_desc->nr_channels; ++i)
+      for(i = 0; i < output_desc->nr_channels; ++i)
       {
          switch(output_desc->channel[0].size)
          {
-- 
cgit v1.2.3


From 15a3b42e135a3a2cb463ec3cff80a55dd8528051 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Mon, 16 Aug 2010 18:52:37 -0700
Subject: util: Remove check_os_katmai_support.

check_os_katmai_support checks that the operating system running on a
SSE-capable processor supports SSE. This is necessary for unpatched
2.2.x and earlier kernels. 2.4.x and later kernels support SSE.

check_os_katmai_support will disable SSE capabilities for 32-bit x86
operating systems for which there is no code path. Currently, this
function handles Linux, Windows, and several BSDs. Mac OS, Cygwin, and
Solaris are several operating systems with no code paths.

Rather than add code for the unhandled operating systems, remove this
function altogether. This will fix SSE detection on all recent 32-bit
x86 operating systems. This completely breaks functionality on unpatched
2.2.x and earlier kernels, although there are likely no Gallium3D users
on such operating systems.
---
 src/gallium/auxiliary/util/u_cpu_detect.c | 120 +-----------------------------
 1 file changed, 1 insertion(+), 119 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 5056351307..b9b9f9257a 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -194,123 +194,8 @@ check_os_altivec_support(void)
 }
 #endif /* PIPE_ARCH_PPC */
 
-/* If we're running on a processor that can do SSE, let's see if we
- * are allowed to or not.  This will catch 2.4.0 or later kernels that
- * haven't been configured for a Pentium III but are running on one,
- * and RedHat patched 2.2 kernels that have broken exception handling
- * support for user space apps that do SSE.
- */
-#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
-static void
-check_os_katmai_support(void)
-{
-#if defined(PIPE_ARCH_X86)
-#if defined(PIPE_OS_FREEBSD)
-   int has_sse=0, ret;
-   int len = sizeof (has_sse);
-
-   ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0);
-   if (ret || !has_sse)
-      util_cpu_caps.has_sse=0;
-
-#elif defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
-   int has_sse, has_sse2, ret, mib[2];
-   int varlen;
-
-   mib[0] = CTL_MACHDEP;
-   mib[1] = CPU_SSE;
-   varlen = sizeof (has_sse);
-
-   ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
-   if (ret < 0 || !has_sse) {
-      util_cpu_caps.has_sse = 0;
-   } else {
-      util_cpu_caps.has_sse = 1;
-   }
-
-   mib[1] = CPU_SSE2;
-   varlen = sizeof (has_sse2);
-   ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
-   if (ret < 0 || !has_sse2) {
-      util_cpu_caps.has_sse2 = 0;
-   } else {
-      util_cpu_caps.has_sse2 = 1;
-   }
-   util_cpu_caps.has_sse = 0; /* FIXME ?!?!? */
-
-#elif defined(PIPE_OS_WINDOWS)
-   LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
-   if (util_cpu_caps.has_sse) {
-      exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
-#if defined(PIPE_CC_GCC)
-      __asm __volatile ("xorps %xmm0, %xmm0");
-#elif defined(PIPE_CC_MSVC)
-      __asm {
-          xorps xmm0, xmm0        /* executing SSE instruction */
-      }
-#else
-#error Unsupported compiler
-#endif
-      SetUnhandledExceptionFilter(exc_fil);
-   }
-#elif defined(PIPE_OS_LINUX)
-   struct sigaction saved_sigill;
-   struct sigaction saved_sigfpe;
-
-   /* Save the original signal handlers.
-   */
-   sigaction(SIGILL, NULL, &saved_sigill);
-   sigaction(SIGFPE, NULL, &saved_sigfpe);
-
-   signal(SIGILL, (void (*)(int))sigill_handler_sse);
-   signal(SIGFPE, (void (*)(int))sigfpe_handler_sse);
-
-   /* Emulate test for OSFXSR in CR4.  The OS will set this bit if it
-    * supports the extended FPU save and restore required for SSE.  If
-    * we execute an SSE instruction on a PIII and get a SIGILL, the OS
-    * doesn't support Streaming SIMD Exceptions, even if the processor
-    * does.
-    */
-   if (util_cpu_caps.has_sse) {
-      __asm __volatile ("xorps %xmm1, %xmm0");
-   }
-
-   /* Emulate test for OSXMMEXCPT in CR4.  The OS will set this bit if
-    * it supports unmasked SIMD FPU exceptions.  If we unmask the
-    * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
-    * doesn't support unmasked SIMD FPU exceptions.  If we get a SIGFPE
-    * as expected, we're okay but we need to clean up after it.
-    *
-    * Are we being too stringent in our requirement that the OS support
-    * unmasked exceptions?  Certain RedHat 2.2 kernels enable SSE by
-    * setting CR4.OSFXSR but don't support unmasked exceptions.  Win98
-    * doesn't even support them.  We at least know the user-space SSE
-    * support is good in kernels that do support unmasked exceptions,
-    * and therefore to be safe I'm going to leave this test in here.
-    */
-   if (util_cpu_caps.has_sse) {
-      /* test_os_katmai_exception_support(); */
-   }
-
-   /* Restore the original signal handlers.
-   */
-   sigaction(SIGILL, &saved_sigill, NULL);
-   sigaction(SIGFPE, &saved_sigfpe, NULL);
-
-#else
-   /* We can't use POSIX signal handling to test the availability of
-    * SSE, so we disable it by default.
-    */
-   util_cpu_caps.has_sse = 0;
-#endif /* __linux__ */
-#endif
-
-#if defined(PIPE_ARCH_X86_64)
-   util_cpu_caps.has_sse = 1;
-#endif
-}
-
 
+#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
 static int has_cpuid(void)
 {
 #if defined(PIPE_ARCH_X86)
@@ -469,9 +354,6 @@ util_cpu_detect(void)
          util_cpu_caps.cacheline = regs2[2] & 0xFF;
       }
 
-      if (util_cpu_caps.has_sse)
-         check_os_katmai_support();
-
       if (!util_cpu_caps.has_sse) {
          util_cpu_caps.has_sse2 = 0;
          util_cpu_caps.has_sse3 = 0;
-- 
cgit v1.2.3


From 37e5f784220248753647801c455eb61e49e16292 Mon Sep 17 00:00:00 2001
From: nobled <nobled@dreamwidth.org>
Date: Mon, 16 Aug 2010 16:46:14 +0000
Subject: gallivm: Fix and re-enable MMX-disabling code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: José Fonseca <jfonseca@vmware.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 6d5410d970..92f9adfc18 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -143,7 +143,6 @@ lp_set_target_options(void)
    llvm::UnsafeFPMath = true;
 #endif
 
-#if 0
    /*
     * LLVM will generate MMX instructions for vectors <= 64 bits, leading to
     * innefficient code, and in 32bit systems, to the corruption of the FPU
@@ -152,10 +151,8 @@ lp_set_target_options(void)
     * See also:
     * - http://llvm.org/bugs/show_bug.cgi?id=3287
     * - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/
-    *
-    * XXX: Unfortunately this is not working.
     */
-   static boolean first = FALSE;
+   static boolean first = TRUE;
    if (first) {
       static const char* options[] = {
          "prog",
@@ -164,7 +161,6 @@ lp_set_target_options(void)
       llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
       first = FALSE;
    }
-#endif
 }
 
 
-- 
cgit v1.2.3


From 547e88e70de16a3d0451c2aa33f87014adc8bb7c Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 18 Aug 2010 04:14:43 +0200
Subject: translate_sse: don't overwrite source buffer pointer

We were putting the source pointer in a register used as a temporary,
breaking all paths that don't read the data in a single instruction.
---
 src/gallium/auxiliary/translate/translate_sse.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 56c5b36ce2..48e59590bc 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -96,7 +96,7 @@ struct translate_sse {
     */
    struct x86_reg tmp_EAX;
    struct x86_reg tmp2_EDX;
-   struct x86_reg tmp3_ECX;
+   struct x86_reg src_ECX;
    struct x86_reg idx_ESI;     /* either start+i or &elt[i] */
    struct x86_reg machine_EDI;
    struct x86_reg outbuf_EBX;
@@ -1052,7 +1052,7 @@ static boolean init_inputs( struct translate_sse *p,
 
             if (varient->instance_divisor != 1) {
                struct x86_reg tmp_EDX = p->tmp2_EDX;
-               struct x86_reg tmp_ECX = p->tmp3_ECX;
+               struct x86_reg tmp_ECX = p->src_ECX;
 
                /* TODO: Add x86_shr() to rtasm and use it whenever
                 *       instance divisor is power of two.
@@ -1108,7 +1108,7 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
       return p->idx_ESI;
    }
    else if (!index_size || p->buffer_varient[var_idx].instance_divisor) {
-      struct x86_reg ptr = p->tmp_EAX;
+      struct x86_reg ptr = p->src_ECX;
       struct x86_reg buf_ptr = 
          x86_make_disp(p->machine_EDI,
                        get_offset(p, &p->buffer_varient[var_idx].ptr));
@@ -1118,7 +1118,7 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
       return ptr;
    }
    else {
-      struct x86_reg ptr = p->tmp_EAX;
+      struct x86_reg ptr = p->src_ECX;
       const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
 
       struct x86_reg buf_stride = 
@@ -1226,7 +1226,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
    p->machine_EDI   = x86_make_reg(file_REG32, reg_DI);
    p->count_EBP     = x86_make_reg(file_REG32, reg_BP);
    p->tmp2_EDX     = x86_make_reg(file_REG32, reg_DX);
-   p->tmp3_ECX     = x86_make_reg(file_REG32, reg_CX);
+   p->src_ECX     = x86_make_reg(file_REG32, reg_CX);
 
    p->func = func;
    memset(&p->loaded_const, 0, sizeof(p->loaded_const));
-- 
cgit v1.2.3


From 8690c6a6b4fb0b48e2ae75cd0f64de86b039081c Mon Sep 17 00:00:00 2001
From: michal <michal@capacitor.(none)>
Date: Wed, 18 Aug 2010 13:16:42 +0200
Subject: gallivm: Use proper index to lookup predicate register array.

Doesn't fix anything, as those indices were both always 0.
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 0aa64affac..ca8db9ce01 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -802,7 +802,7 @@ emit_store(
 
    case TGSI_FILE_PREDICATE:
       lp_exec_mask_store(&bld->exec_mask, pred, value,
-                         bld->preds[index][chan_index]);
+                         bld->preds[reg->Register.Index][chan_index]);
       break;
 
    default:
-- 
cgit v1.2.3


From 18f207310b3c7dd7207d56e86a80b2e1d23c5364 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Thu, 19 Aug 2010 10:03:03 +0200
Subject: translate_sse: fix emit_load_sse2

---
 src/gallium/auxiliary/translate/translate_sse.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 48e59590bc..06b8f32fe6 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -190,11 +190,13 @@ static boolean emit_load_sse2( struct translate_sse *p,
    case 2:
       x86_movzx16(p->func, tmp, src);
       sse2_movd(p->func, data, tmp);
+      break;
    case 3:
       x86_movzx8(p->func, tmp, x86_make_disp(src, 2));
       x86_shl_imm(p->func, tmp, 16);
       x86_mov16(p->func, tmp, src);
       sse2_movd(p->func, data, tmp);
+      break;
    case 4:
       sse2_movd(p->func, data, src);
       break;
-- 
cgit v1.2.3


From c54dea66fd86f6000e334c703ea4890179c39c81 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Thu, 19 Aug 2010 10:07:58 +0200
Subject: translate_sse: try to fix Win64

Not sure whether it works now (it is still disabled).
---
 src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 0fe6ebfcb4..75b0f6a68e 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -2105,8 +2105,9 @@ struct x86_reg x86_fn_arg( struct x86_function *p,
       case 4:
          return x86_make_reg(file_REG32, reg_R9);
       default:
+	 /* Win64 allocates stack slots as if it pushed the first 4 arguments too */
          return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
-               p->stack_offset + (arg - 4) * 8);     /* ??? */
+               p->stack_offset + arg * 8);
       }
    case X86_64_STD_ABI:
       switch(arg)
-- 
cgit v1.2.3


From a9b20d45974045397b95b08a7a26e2b0aa8e32cf Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 19 Aug 2010 23:32:04 +0200
Subject: u_blitter: fix a memory leak

---
 src/gallium/auxiliary/util/u_blitter.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index b5b86b7214..49ee7bb31d 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -254,6 +254,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
                                           ctx->dsa_write_depth_keep_stencil);
    pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
    pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
+   pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
 
    pipe->delete_rasterizer_state(pipe, ctx->rs_state);
    pipe->delete_vs_state(pipe, ctx->vs_col);
-- 
cgit v1.2.3


From d3fe699b0b2d46480b699b3fa4c77e41eea8d30d Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Fri, 20 Aug 2010 00:04:30 +0200
Subject: translate_sse: enable on Win64

According to Vinson, enabling it causes no regressions
---
 src/gallium/auxiliary/translate/translate.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c
index 03a7f050aa..73287b667d 100644
--- a/src/gallium/auxiliary/translate/translate.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -38,8 +38,7 @@ struct translate *translate_create( const struct translate_key *key )
 {
    struct translate *translate = NULL;
 
-/* TODO: enable Win64 once it has actually been tested */
-#if defined(PIPE_ARCH_X86) || (defined(PIPE_ARCH_X86_64) && !defined(_WIN64))
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
    translate = translate_sse2_create( key );
    if (translate)
       return translate;
-- 
cgit v1.2.3


From ae0ef6f69f351cacdc7eaa9b21097a7c1b414e44 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 18 Aug 2010 17:28:08 +0200
Subject: gallium: make all checks for PIPE_TEXTURE_2D check for
 PIPE_TEXTURE_RECT too

Searched for them with:
git grep -E '[!=]=.*PIPE_TEXTURE_2D|PIPE_TEXTURE_2D.*[!=]=|case.*PIPE_TEXTURE_2D'

Behavior hasn't been changed.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 7 +++++--
 src/gallium/auxiliary/util/u_blit.c               | 3 ++-
 src/gallium/auxiliary/util/u_blitter.c            | 3 +++
 src/gallium/auxiliary/util/u_gen_mipmap.c         | 1 +
 src/gallium/auxiliary/util/u_surfaces.h           | 4 ++--
 src/gallium/drivers/i915/i915_resource_texture.c  | 5 ++++-
 src/gallium/drivers/i965/brw_resource_texture.c   | 4 +++-
 src/gallium/drivers/llvmpipe/lp_screen.c          | 1 +
 src/gallium/drivers/llvmpipe/lp_texture.c         | 1 +
 src/gallium/drivers/nv50/nv50_miptree.c           | 3 ++-
 src/gallium/drivers/nv50/nv50_tex.c               | 1 +
 src/gallium/drivers/nvfx/nv30_fragtex.c           | 1 +
 src/gallium/drivers/nvfx/nv40_fragtex.c           | 1 +
 src/gallium/drivers/nvfx/nvfx_miptree.c           | 3 ++-
 src/gallium/drivers/r300/r300_hyperz.c            | 3 ++-
 src/gallium/drivers/r300/r300_texture.c           | 6 ++++--
 src/gallium/drivers/r300/r300_texture_desc.c      | 6 ++++--
 src/gallium/drivers/r600/r600_state.c             | 1 +
 src/gallium/drivers/r600/r600_texture.c           | 3 ++-
 src/gallium/drivers/softpipe/sp_screen.c          | 1 +
 src/gallium/drivers/softpipe/sp_tex_sample.c      | 2 ++
 src/gallium/drivers/svga/svga_resource_texture.c  | 3 ++-
 src/gallium/drivers/svga/svga_tgsi_emit.h         | 1 +
 src/gallium/tests/python/tests/texture_blit.py    | 2 +-
 src/mesa/state_tracker/st_cb_bitmap.c             | 2 +-
 25 files changed, 50 insertions(+), 18 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 806c7d56a8..f6b6162f63 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -176,6 +176,7 @@ texture_dims(enum pipe_texture_target tex)
    case PIPE_TEXTURE_1D:
       return 1;
    case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
    case PIPE_TEXTURE_CUBE:
       return 2;
    case PIPE_TEXTURE_3D:
@@ -1749,7 +1750,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    LLVMValueRef unswizzled[4];
    LLVMValueRef stride;
 
-   assert(bld->static_state->target == PIPE_TEXTURE_2D);
+   assert(bld->static_state->target == PIPE_TEXTURE_2D
+         || bld->static_state->target == PIPE_TEXTURE_RECT);
    assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
    assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
    assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
@@ -2077,7 +2079,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    }
    else if (util_format_fits_8unorm(bld.format_desc) &&
             bld.format_desc->nr_channels > 1 &&
-            static_state->target == PIPE_TEXTURE_2D &&
+            (static_state->target == PIPE_TEXTURE_2D ||
+                  static_state->target == PIPE_TEXTURE_RECT) &&
             static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
             static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
             static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 97fa99ec65..30c7a96462 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -347,7 +347,8 @@ util_blit_pixels_writemask(struct blit_state *ctx,
        dst->face == srcsub.face &&
        dst->level == srcsub.level &&
        dst->zslice == srcZ0) ||
-       src_tex->target != PIPE_TEXTURE_2D)
+       (src_tex->target != PIPE_TEXTURE_2D &&
+       src_tex->target != PIPE_TEXTURE_RECT))
    {
       struct pipe_resource texTemp;
       struct pipe_resource *tex;
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 49ee7bb31d..4b69a7fb6a 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -569,6 +569,8 @@ pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target)
       return TGSI_TEXTURE_1D;
    case PIPE_TEXTURE_2D:
       return TGSI_TEXTURE_2D;
+   case PIPE_TEXTURE_RECT:
+      return TGSI_TEXTURE_2D;
    case PIPE_TEXTURE_3D:
       return TGSI_TEXTURE_3D;
    case PIPE_TEXTURE_CUBE:
@@ -807,6 +809,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
       /* Draw the quad with the draw_rectangle callback. */
       case PIPE_TEXTURE_1D:
       case PIPE_TEXTURE_2D:
+      case PIPE_TEXTURE_RECT:
          {
             /* Set texture coordinates. */
             float coord[4];
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index b7fe2d3003..6a931a9581 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -1255,6 +1255,7 @@ fallback_gen_mipmap(struct gen_mipmap_state *ctx,
       make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel);
       break;
    case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
    case PIPE_TEXTURE_CUBE:
       make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel);
       break;
diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h
index af978c7057..46f3ec5d7d 100644
--- a/src/gallium/auxiliary/util/u_surfaces.h
+++ b/src/gallium/auxiliary/util/u_surfaces.h
@@ -22,7 +22,7 @@ struct pipe_surface *util_surfaces_do_get(struct util_surfaces *us, unsigned sur
 static INLINE struct pipe_surface *
 util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags)
 {
-   if(likely(pt->target == PIPE_TEXTURE_2D && us->u.array))
+   if(likely((pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT) && us->u.array))
    {
       struct pipe_surface *ps = us->u.array[level];
       if(ps)
@@ -52,7 +52,7 @@ void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps);
 static INLINE void
 util_surfaces_detach(struct util_surfaces *us, struct pipe_surface *ps)
 {
-   if(likely(ps->texture->target == PIPE_TEXTURE_2D))
+   if(likely(ps->texture->target == PIPE_TEXTURE_2D || ps->texture->target == PIPE_TEXTURE_RECT))
    {
       us->u.array[ps->level] = 0;
       return;
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c
index 752ddaae7b..c5c6179b16 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -360,6 +360,7 @@ i915_texture_layout(struct i915_texture * tex)
    switch (pt->target) {
    case PIPE_TEXTURE_1D:
    case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
       if (!i9x5_special_layout(tex))
          i915_texture_layout_2d(tex);
       break;
@@ -605,6 +606,7 @@ i945_texture_layout(struct i915_texture * tex)
    switch (pt->target) {
    case PIPE_TEXTURE_1D:
    case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
       if (!i9x5_special_layout(tex))
          i945_texture_layout_2d(tex);
       break;
@@ -829,7 +831,8 @@ i915_texture_from_handle(struct pipe_screen * screen,
    buffer = iws->buffer_from_handle(iws, whandle, &stride);
 
    /* Only supports one type */
-   if (template->target != PIPE_TEXTURE_2D ||
+   if ((template->target != PIPE_TEXTURE_2D &&
+       template->target != PIPE_TEXTURE_RECT) ||
        template->last_level != 0 ||
        template->depth0 != 1) {
       return NULL;
diff --git a/src/gallium/drivers/i965/brw_resource_texture.c b/src/gallium/drivers/i965/brw_resource_texture.c
index ffd0f38672..3860d18a7a 100644
--- a/src/gallium/drivers/i965/brw_resource_texture.c
+++ b/src/gallium/drivers/i965/brw_resource_texture.c
@@ -66,6 +66,7 @@ static GLuint translate_tex_target( unsigned target )
       return BRW_SURFACE_1D;
 
    case PIPE_TEXTURE_2D: 
+   case PIPE_TEXTURE_RECT:
       return BRW_SURFACE_2D;
 
    case PIPE_TEXTURE_3D: 
@@ -498,7 +499,8 @@ brw_texture_from_handle(struct pipe_screen *screen,
    unsigned pitch;
    GLuint format;
 
-   if (template->target != PIPE_TEXTURE_2D ||
+   if ((template->target != PIPE_TEXTURE_2D
+         && template->target != PIPE_TEXTURE_RECT)  ||
        template->last_level != 0 ||
        template->depth0 != 1)
       return NULL;
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 167cb2ee2e..6968cda629 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -230,6 +230,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
    assert(target == PIPE_BUFFER ||
           target == PIPE_TEXTURE_1D ||
           target == PIPE_TEXTURE_2D ||
+          target == PIPE_TEXTURE_RECT ||
           target == PIPE_TEXTURE_3D ||
           target == PIPE_TEXTURE_CUBE);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 25112c10a6..ff4773fd7c 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -67,6 +67,7 @@ resource_is_texture(const struct pipe_resource *resource)
       return FALSE;
    case PIPE_TEXTURE_1D:
    case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
    case PIPE_TEXTURE_3D:
    case PIPE_TEXTURE_CUBE:
       return TRUE;
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index b7cd92158f..c0f5cc10dd 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -235,7 +235,8 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen,
 	unsigned stride;
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
-	if (template->target != PIPE_TEXTURE_2D ||
+	if ((template->target != PIPE_TEXTURE_2D &&
+	      template->target != PIPE_TEXTURE_RECT) ||
 	    template->last_level != 0 ||
 	    template->depth0 != 1)
 		return NULL;
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 5ea0c1d726..4db53f7ec2 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -131,6 +131,7 @@ nv50_tex_construct(struct nv50_sampler_view *view)
 		tic[2] |= NV50TIC_0_2_TARGET_1D;
 		break;
 	case PIPE_TEXTURE_2D:
+	case PIPE_TEXTURE_RECT:
 		tic[2] |= NV50TIC_0_2_TARGET_2D;
 		break;
 	case PIPE_TEXTURE_3D:
diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c
index dec073ac90..0cd70ca104 100644
--- a/src/gallium/drivers/nvfx/nv30_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
@@ -116,6 +116,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
 		txf |= NV34TCL_TX_FORMAT_CUBIC;
 		/* fall-through */
 	case PIPE_TEXTURE_2D:
+	case PIPE_TEXTURE_RECT:
 		txf |= NV34TCL_TX_FORMAT_DIMS_2D;
 		break;
 	case PIPE_TEXTURE_3D:
diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c
index 0068b1ba54..0d3e90dcb0 100644
--- a/src/gallium/drivers/nvfx/nv40_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv40_fragtex.c
@@ -135,6 +135,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
 		txf |= NV34TCL_TX_FORMAT_CUBIC;
 		/* fall-through */
 	case PIPE_TEXTURE_2D:
+	case PIPE_TEXTURE_RECT:
 		txf |= NV34TCL_TX_FORMAT_DIMS_2D;
 		break;
 	case PIPE_TEXTURE_3D:
diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c
index b5639bb464..1fec1ffa42 100644
--- a/src/gallium/drivers/nvfx/nvfx_miptree.c
+++ b/src/gallium/drivers/nvfx/nvfx_miptree.c
@@ -205,7 +205,8 @@ nvfx_miptree_from_handle(struct pipe_screen *pscreen,
 	unsigned stride;
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
-	if (template->target != PIPE_TEXTURE_2D ||
+	if ((template->target != PIPE_TEXTURE_2D &&
+	      template->target != PIPE_TEXTURE_RECT) ||
 	    template->last_level != 0 ||
 	    template->depth0 != 1)
 		return NULL;
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index b2526d6e41..eb5b0c36f8 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -354,7 +354,8 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf
     /* We currently don't handle decompression for 3D textures and cubemaps
      * correctly. */
     if (tex->desc.b.b.target != PIPE_TEXTURE_1D &&
-        tex->desc.b.b.target != PIPE_TEXTURE_2D)
+        tex->desc.b.b.target != PIPE_TEXTURE_2D &&
+        tex->desc.b.b.target != PIPE_TEXTURE_RECT)
         return;
 
     /* Cannot flush zmask of 16-bit zbuffers. */
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index da8eadd3b5..852acdd462 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -754,7 +754,8 @@ struct pipe_resource *r300_texture_create(struct pipe_screen *screen,
     /* Refuse to create a texture with size 0. */
     if (!base->width0 ||
         (!base->height0 && (base->target == PIPE_TEXTURE_2D ||
-                            base->target == PIPE_TEXTURE_CUBE)) ||
+                            base->target == PIPE_TEXTURE_CUBE ||
+                            base->target == PIPE_TEXTURE_RECT)) ||
         (!base->depth0 && base->target == PIPE_TEXTURE_3D)) {
         fprintf(stderr, "r300: texture_create: "
                 "Got invalid texture dimensions: %ix%ix%i\n",
@@ -787,7 +788,8 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
     unsigned stride, size;
 
     /* Support only 2D textures without mipmaps */
-    if (base->target != PIPE_TEXTURE_2D ||
+    if ((base->target != PIPE_TEXTURE_2D &&
+          base->target != PIPE_TEXTURE_RECT) ||
         base->depth0 != 1 ||
         base->last_level != 0) {
         return NULL;
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index 5d690e8c33..2fe5d72188 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -184,7 +184,8 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
 
         /* This is needed for the kernel checker, unfortunately. */
         if ((desc->b.b.target != PIPE_TEXTURE_1D &&
-             desc->b.b.target != PIPE_TEXTURE_2D) ||
+             desc->b.b.target != PIPE_TEXTURE_2D &&
+             desc->b.b.target != PIPE_TEXTURE_RECT) ||
             desc->b.b.last_level != 0) {
             height = util_next_power_of_two(height);
         }
@@ -202,7 +203,8 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
                  * Do so for 3 or more macrotiles in the Y direction. */
                 if (level == 0 && desc->b.b.last_level == 0 &&
                     (desc->b.b.target == PIPE_TEXTURE_1D ||
-                     desc->b.b.target == PIPE_TEXTURE_2D) &&
+                     desc->b.b.target == PIPE_TEXTURE_2D ||
+                     desc->b.b.target == PIPE_TEXTURE_RECT) &&
                     height >= tile_height * 3) {
                     height = align(height, tile_height * 2);
                 }
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index f78d1671ba..7d2b61f9b0 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1199,6 +1199,7 @@ static inline unsigned r600_tex_dim(unsigned dim)
 	case PIPE_TEXTURE_1D:
 		return V_038000_SQ_TEX_DIM_1D;
 	case PIPE_TEXTURE_2D:
+	case PIPE_TEXTURE_RECT:
 		return V_038000_SQ_TEX_DIM_2D;
 	case PIPE_TEXTURE_3D:
 		return V_038000_SQ_TEX_DIM_3D;
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index eabd7f7705..8a6b5f8764 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -170,7 +170,8 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
 	}
 
 	/* Support only 2D textures without mipmaps */
-	if (templ->target != PIPE_TEXTURE_2D || templ->depth0 != 1 || templ->last_level != 0)
+	if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
+	      templ->depth0 != 1 || templ->last_level != 0)
 		return NULL;
 
 	rtex = CALLOC_STRUCT(r600_resource_texture);
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 93af6ee5b0..73ae2dea56 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -199,6 +199,7 @@ softpipe_is_format_supported( struct pipe_screen *screen,
    assert(target == PIPE_BUFFER ||
           target == PIPE_TEXTURE_1D ||
           target == PIPE_TEXTURE_2D ||
+          target == PIPE_TEXTURE_RECT ||
           target == PIPE_TEXTURE_3D ||
           target == PIPE_TEXTURE_CUBE);
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index cf7ab81405..e654bb77c2 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1785,6 +1785,7 @@ get_lambda_func(const union sp_sampler_key key)
    case PIPE_TEXTURE_1D:
       return compute_lambda_1d;
    case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
    case PIPE_TEXTURE_CUBE:
       return compute_lambda_2d;
    case PIPE_TEXTURE_3D:
@@ -1809,6 +1810,7 @@ get_img_filter(const union sp_sampler_key key,
          return img_filter_1d_linear;
       break;
    case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
       /* Try for fast path:
        */
       if (key.bits.is_pot &&
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index ff83c750aa..26eb03a895 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -583,7 +583,8 @@ svga_texture_from_handle(struct pipe_screen *screen,
    assert(screen);
 
    /* Only supports one type */
-   if (template->target != PIPE_TEXTURE_2D ||
+   if ((template->target != PIPE_TEXTURE_2D &&
+       template->target != PIPE_TEXTURE_RECT) ||
        template->last_level != 0 ||
        template->depth0 != 1) {
       return NULL;
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
index 48eced2ece..b4e90a957d 100644
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -353,6 +353,7 @@ static INLINE ubyte svga_tgsi_sampler_type( struct svga_shader_emitter *emit,
    case PIPE_TEXTURE_1D:
       return SVGA3DSAMP_2D;
    case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
       return SVGA3DSAMP_2D;
    case PIPE_TEXTURE_3D:
       return SVGA3DSAMP_VOLUME;
diff --git a/src/gallium/tests/python/tests/texture_blit.py b/src/gallium/tests/python/tests/texture_blit.py
index 58706dab93..089d05c623 100755
--- a/src/gallium/tests/python/tests/texture_blit.py
+++ b/src/gallium/tests/python/tests/texture_blit.py
@@ -55,7 +55,7 @@ def tex_coords(texture, face, level, zslice):
         [0.0, 1.0],
     ] 
     
-    if texture.target == PIPE_TEXTURE_2D:
+    if texture.target == PIPE_TEXTURE_2D or texture.target == PIPE_TEXTURE_RECT:
         return [[s, t, 0.0] for s, t in st]
     elif texture.target == PIPE_TEXTURE_3D:
         depth = texture.get_depth(level)
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 0b8ecd27cb..91037ab223 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -761,7 +761,7 @@ st_Bitmap(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
    if (pt) {
       struct pipe_sampler_view *sv = st_create_texture_sampler_view(st->pipe, pt);
 
-      assert(pt->target == PIPE_TEXTURE_2D);
+      assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT);
 
       if (sv) {
          draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2],
-- 
cgit v1.2.3


From cbe367227959a32ff20e146106162a81d2be02c3 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 18 Aug 2010 17:30:20 +0200
Subject: u_blitter: use TGSI_TEXTURE_RECT

This seems to make sense, although I suspect the semantics of
TGSI_TEXTURE_RECT need to be closely reviewed.
---
 src/gallium/auxiliary/util/u_blitter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 4b69a7fb6a..9c6887b5cd 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -570,7 +570,7 @@ pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target)
    case PIPE_TEXTURE_2D:
       return TGSI_TEXTURE_2D;
    case PIPE_TEXTURE_RECT:
-      return TGSI_TEXTURE_2D;
+      return TGSI_TEXTURE_RECT;
    case PIPE_TEXTURE_3D:
       return TGSI_TEXTURE_3D;
    case PIPE_TEXTURE_CUBE:
-- 
cgit v1.2.3


From 4a9bfb24eb907080b2e3e49215ad9912758d56c6 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 18 Aug 2010 22:57:02 +0200
Subject: u_staging: use PIPE_TEXTURE_RECT

---
 src/gallium/auxiliary/util/u_staging.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c
index 607c31f5ee..e2dc696d20 100644
--- a/src/gallium/auxiliary/util/u_staging.c
+++ b/src/gallium/auxiliary/util/u_staging.c
@@ -8,7 +8,7 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne
 {
    memset(template, 0, sizeof(struct pipe_resource));
    if(pt->target != PIPE_BUFFER && depth <= 1)
-      template->target = PIPE_TEXTURE_2D;
+      template->target = PIPE_TEXTURE_RECT;
    else
       template->target = pt->target;
    template->format = pt->format;
-- 
cgit v1.2.3


From d4ec85e62423336d3cddc45f26bef6764f435a18 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Fri, 6 Aug 2010 07:39:21 +0200
Subject: auxiliary: support using PIPE_TEXTURE_RECT internally

Currently Gallium internals always use PIPE_TEXTURE_2D and normalized
coordinates to access textures.

However, PIPE_TEXTURE_2D is not always supported for NPOT textures,
and PIPE_TEXTURE_RECT requires unnormalized coordinates.

Hence, this change adds support for both kinds of normalization.
---
 src/gallium/auxiliary/util/u_blit.c    | 67 +++++++++++++++++++++++++---------
 src/gallium/auxiliary/util/u_blitter.c | 48 +++++++++++++++---------
 2 files changed, 81 insertions(+), 34 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 30c7a96462..6fb341eaf2 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -62,6 +62,7 @@ struct blit_state
    struct pipe_viewport_state viewport;
    struct pipe_clip_state clip;
    struct pipe_vertex_element velem[2];
+   enum pipe_texture_target internal_target;
 
    void *vs;
    void *fs[TGSI_WRITEMASK_XYZW + 1];
@@ -110,7 +111,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
    ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
    ctx->sampler.min_img_filter = 0; /* set later */
    ctx->sampler.mag_img_filter = 0; /* set later */
-   ctx->sampler.normalized_coords = 1;
 
    /* vertex elements state */
    memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
@@ -145,6 +145,11 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
       ctx->vertices[i][1][3] = 1.0f; /* q */
    }
 
+   if(pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES))
+      ctx->internal_target = PIPE_TEXTURE_2D;
+   else
+      ctx->internal_target = PIPE_TEXTURE_RECT;
+
    return ctx;
 }
 
@@ -296,6 +301,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    unsigned offset;
    boolean overlap;
    float s0, t0, s1, t1;
+   boolean normalized;
 
    assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
           filter == PIPE_TEX_MIPFILTER_LINEAR);
@@ -335,7 +341,6 @@ util_blit_pixels_writemask(struct blit_state *ctx,
       return;
    }
 
-
    /* Create a temporary texture when src and dest alias or when src
     * is anything other than a 2d texture.
     * XXX should just use appropriate shader to access 1d / 3d slice / cube face,
@@ -373,7 +378,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
 
       /* create temp texture */
       memset(&texTemp, 0, sizeof(texTemp));
-      texTemp.target = PIPE_TEXTURE_2D;
+      texTemp.target = ctx->internal_target;
       texTemp.format = src_tex->format;
       texTemp.last_level = 0;
       texTemp.width0 = srcW;
@@ -393,10 +398,19 @@ util_blit_pixels_writemask(struct blit_state *ctx,
                                  src_tex, srcsub, srcLeft, srcTop, srcZ0, /* src */
                                  srcW, srcH);     /* size */
 
-      s0 = 0.0f; 
-      s1 = 1.0f;
-      t0 = 0.0f;
-      t1 = 1.0f;
+      normalized = tex->target != PIPE_TEXTURE_RECT;
+      if(normalized) {
+         s0 = 0.0f;
+         s1 = 1.0f;
+         t0 = 0.0f;
+         t1 = 1.0f;
+      }
+      else {
+         s0 = 0;
+         s1 = srcW;
+         t0 = 0;
+         t1 = srcH;
+      }
 
       u_sampler_view_default_template(&sv_templ, tex, tex->format);
       sampler_view = pipe->create_sampler_view(pipe, tex, &sv_templ);
@@ -416,17 +430,25 @@ util_blit_pixels_writemask(struct blit_state *ctx,
          return;
       }
 
-      s0 = srcX0 / (float)(u_minify(sampler_view->texture->width0, srcsub.level));
-      s1 = srcX1 / (float)(u_minify(sampler_view->texture->width0, srcsub.level));
-      t0 = srcY0 / (float)(u_minify(sampler_view->texture->height0, srcsub.level));
-      t1 = srcY1 / (float)(u_minify(sampler_view->texture->height0, srcsub.level));
+      s0 = srcX0;
+      s1 = srcX1;
+      t0 = srcY0;
+      t1 = srcY1;
+      normalized = sampler_view->texture->target != PIPE_TEXTURE_RECT;
+      if(normalized)
+      {
+         s0 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level));
+         s1 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level));
+         t0 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level));
+         t1 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level));
+      }
    }
 
 
-   assert(screen->is_format_supported(screen, sampler_view->format, PIPE_TEXTURE_2D,
+   assert(screen->is_format_supported(screen, sampler_view->format, ctx->internal_target,
                                       sampler_view->texture->nr_samples,
                                       PIPE_BIND_SAMPLER_VIEW, 0));
-   assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
+   assert(screen->is_format_supported(screen, dst->format, ctx->internal_target,
                                       dst->texture->nr_samples,
                                       PIPE_BIND_RENDER_TARGET, 0));
 
@@ -451,6 +473,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    /* sampler */
+   ctx->sampler.normalized_coords = normalized;
    ctx->sampler.min_img_filter = filter;
    ctx->sampler.mag_img_filter = filter;
    /* we've limited this already with the sampler view but you never know... */
@@ -575,6 +598,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
                      int dstX1, int dstY1,
                      float z, uint filter)
 {
+   boolean normalized = src_sampler_view->texture->target != PIPE_TEXTURE_RECT;
    struct pipe_framebuffer_state fb;
    float s0, t0, s1, t1;
    unsigned offset;
@@ -587,10 +611,18 @@ util_blit_pixels_tex(struct blit_state *ctx,
    assert(tex->width0 != 0);
    assert(tex->height0 != 0);
 
-   s0 = srcX0 / (float)tex->width0;
-   s1 = srcX1 / (float)tex->width0;
-   t0 = srcY0 / (float)tex->height0;
-   t1 = srcY1 / (float)tex->height0;
+   s0 = srcX0;
+   s1 = srcX1;
+   t0 = srcY0;
+   t1 = srcY1;
+
+   if(normalized)
+   {
+      s0 /= (float)tex->width0;
+      s1 /= (float)tex->width0;
+      t0 /= (float)tex->height0;
+      t1 /= (float)tex->height0;
+   }
 
    assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
                                                  PIPE_TEXTURE_2D,
@@ -618,6 +650,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    /* sampler */
+   ctx->sampler.normalized_coords = normalized;
    ctx->sampler.min_img_filter = filter;
    ctx->sampler.mag_img_filter = filter;
    cso_single_sampler(ctx->cso, 0, &ctx->sampler);
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 9c6887b5cd..8f93dac011 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -92,7 +92,7 @@ struct blitter_context_priv
    void *velem_state;
 
    /* Sampler state for clamping to a miplevel. */
-   void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
+   void *sampler_state[PIPE_MAX_TEXTURE_LEVELS * 2];
 
    /* Rasterizer state. */
    void *rs_state;
@@ -272,7 +272,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
       if (ctx->fs_col[i])
          pipe->delete_fs_state(pipe, ctx->fs_col[i]);
 
-   for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
+   for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS * 2; i++)
       if (ctx->sampler_state[i])
          pipe->delete_sampler_state(pipe, ctx->sampler_state[i]);
 
@@ -418,16 +418,26 @@ static void blitter_set_clear_color(struct blitter_context_priv *ctx,
    }
 }
 
-static void get_normalized_texcoords(struct pipe_resource *src,
+static void get_texcoords(struct pipe_resource *src,
                                      struct pipe_subresource subsrc,
                                      unsigned x1, unsigned y1,
                                      unsigned x2, unsigned y2,
-                                     float out[4])
+                                     boolean normalized, float out[4])
 {
-   out[0] = x1 / (float)u_minify(src->width0,  subsrc.level);
-   out[1] = y1 / (float)u_minify(src->height0, subsrc.level);
-   out[2] = x2 / (float)u_minify(src->width0,  subsrc.level);
-   out[3] = y2 / (float)u_minify(src->height0, subsrc.level);
+   if(normalized)
+   {
+      out[0] = x1 / (float)u_minify(src->width0,  subsrc.level);
+      out[1] = y1 / (float)u_minify(src->height0, subsrc.level);
+      out[2] = x2 / (float)u_minify(src->width0,  subsrc.level);
+      out[3] = y2 / (float)u_minify(src->height0, subsrc.level);
+   }
+   else
+   {
+      out[0] = x1;
+      out[1] = y1;
+      out[2] = x2;
+      out[3] = y2;
+   }
 }
 
 static void set_texcoords_in_vertices(const float coord[4],
@@ -455,7 +465,7 @@ static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx,
    unsigned i;
    float coord[4];
 
-   get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
+   get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord);
    set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8);
 
    for (i = 0; i < 4; i++) {
@@ -490,7 +500,7 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
    float coord[4];
    float st[4][2];
 
-   get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
+   get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord);
    set_texcoords_in_vertices(coord, &st[0][0], 2);
 
    util_map_texcoords2d_onto_cubemap(subsrc.face,
@@ -524,7 +534,7 @@ static void blitter_draw_quad(struct blitter_context_priv *ctx)
 
 static INLINE
 void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
-                                 int miplevel)
+                                 int miplevel, boolean normalized)
 {
    struct pipe_context *pipe = ctx->base.pipe;
    struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state;
@@ -532,18 +542,19 @@ void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
    assert(miplevel < PIPE_MAX_TEXTURE_LEVELS);
 
    /* Create the sampler state on-demand. */
-   if (!ctx->sampler_state[miplevel]) {
+   if (!ctx->sampler_state[miplevel * 2 + normalized]) {
       sampler_state->lod_bias = miplevel;
       sampler_state->min_lod = miplevel;
       sampler_state->max_lod = miplevel;
+      sampler_state->normalized_coords = normalized;
 
-      ctx->sampler_state[miplevel] = pipe->create_sampler_state(pipe,
+      ctx->sampler_state[miplevel * 2 + normalized] = pipe->create_sampler_state(pipe,
                                                                 sampler_state);
    }
 
    /* Return void** so that it can be passed to bind_fragment_sampler_states
     * directly. */
-   return &ctx->sampler_state[miplevel];
+   return &ctx->sampler_state[miplevel * 2 + normalized];
 }
 
 static INLINE
@@ -719,6 +730,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
    struct pipe_sampler_view viewTempl, *view;
    unsigned bind;
    boolean is_stencil, is_depth;
+   boolean normalized;
 
    /* Give up if textures are not set. */
    assert(dst && src);
@@ -790,6 +802,8 @@ void util_blitter_copy_region(struct blitter_context *blitter,
       fb_state.zsbuf = 0;
    }
 
+   normalized = src->target != PIPE_TEXTURE_RECT;
+
    /* Initialize sampler view. */
    u_sampler_view_default_template(&viewTempl, src, src->format);
    view = pipe->create_sampler_view(pipe, src, &viewTempl);
@@ -798,7 +812,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
    pipe->bind_vs_state(pipe, ctx->vs_tex);
    pipe->bind_fragment_sampler_states(pipe, 1,
-                                      blitter_get_sampler_state(ctx, subsrc.level));
+                                      blitter_get_sampler_state(ctx, subsrc.level, normalized));
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    pipe->set_fragment_sampler_views(pipe, 1, &view);
    pipe->set_framebuffer_state(pipe, &fb_state);
@@ -813,8 +827,8 @@ void util_blitter_copy_region(struct blitter_context *blitter,
          {
             /* Set texture coordinates. */
             float coord[4];
-            get_normalized_texcoords(src, subsrc, srcx, srcy,
-                                     srcx+width, srcy+height, coord);
+            get_texcoords(src, subsrc, srcx, srcy,
+                                     srcx+width, srcy+height, normalized, coord);
 
             /* Draw. */
             blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
-- 
cgit v1.2.3


From 3aaec4750d6fda39b3bb4fc0a159fba1655feede Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Fri, 20 Aug 2010 16:35:34 +0200
Subject: u_staging: improve interface

---
 src/gallium/auxiliary/util/u_staging.c | 10 +++-------
 src/gallium/auxiliary/util/u_staging.h |  8 ++++----
 2 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c
index e2dc696d20..363e1c864b 100644
--- a/src/gallium/auxiliary/util/u_staging.c
+++ b/src/gallium/auxiliary/util/u_staging.c
@@ -23,20 +23,16 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne
 }
 
 struct util_staging_transfer *
-util_staging_transfer_new(struct pipe_context *pipe,
+util_staging_transfer_init(struct pipe_context *pipe,
            struct pipe_resource *pt,
            struct pipe_subresource sr,
            unsigned usage,
            const struct pipe_box *box,
-           bool direct)
+           bool direct, struct util_staging_transfer *tx)
 {
    struct pipe_screen *pscreen = pipe->screen;
-   struct util_staging_transfer *tx;
-   struct pipe_resource staging_resource_template;
 
-   tx = CALLOC_STRUCT(util_staging_transfer);
-   if (!tx)
-      return NULL;
+   struct pipe_resource staging_resource_template;
 
    pipe_resource_reference(&tx->base.resource, pt);
    tx->base.sr = sr;
diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h
index 602faa2971..3a9da9b401 100644
--- a/src/gallium/auxiliary/util/u_staging.h
+++ b/src/gallium/auxiliary/util/u_staging.h
@@ -21,15 +21,15 @@ struct util_staging_transfer {
 };
 
 /* user must be stride, slice_stride and offset */
-/* pt->usage == PIPE_USAGE_DYNAMIC should be a good value to pass for direct */
-/* staging resource is currently created with PIPE_USAGE_DYNAMIC */
+/* pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING should be a good value to pass for direct */
+/* staging resource is currently created with PIPE_USAGE_STAGING */
 struct util_staging_transfer *
-util_staging_transfer_new(struct pipe_context *pipe,
+util_staging_transfer_init(struct pipe_context *pipe,
            struct pipe_resource *pt,
            struct pipe_subresource sr,
            unsigned usage,
            const struct pipe_box *box,
-           bool direct);
+           bool direct, struct util_staging_transfer *tx);
 
 void
 util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx);
-- 
cgit v1.2.3


From d46f91af68e4930b84dd066687c4d865cb54c9b3 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 18 Aug 2010 00:36:14 +0200
Subject: auxiliary: add functions to describe gallium objects

---
 src/gallium/auxiliary/Makefile                |  1 +
 src/gallium/auxiliary/SConscript              |  1 +
 src/gallium/auxiliary/util/u_debug_describe.c | 43 +++++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_debug_describe.h | 10 +++++++
 4 files changed, 55 insertions(+)
 create mode 100644 src/gallium/auxiliary/util/u_debug_describe.c
 create mode 100644 src/gallium/auxiliary/util/u_debug_describe.h

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index eb2a40cbaa..2dae479275 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -92,6 +92,7 @@ C_SOURCES = \
 	translate/translate.c \
 	translate/translate_cache.c \
 	util/u_debug.c \
+	util/u_debug_describe.c \
 	util/u_debug_symbol.c \
 	util/u_debug_stack.c \
 	util/u_dump_defines.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 30e5d02c9b..43774e3311 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -146,6 +146,7 @@ source = [
     'util/u_caps.c',
     'util/u_cpu_detect.c',
     'util/u_debug.c',
+    'util/u_debug_describe.c',
     'util/u_debug_memory.c',
     'util/u_debug_stack.c',
     'util/u_debug_symbol.c',
diff --git a/src/gallium/auxiliary/util/u_debug_describe.c b/src/gallium/auxiliary/util/u_debug_describe.c
new file mode 100644
index 0000000000..5c7808f7ec
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_describe.c
@@ -0,0 +1,43 @@
+#include <pipe/p_state.h>
+#include <util/u_format.h>
+#include <util/u_debug_describe.h>
+#include <util/u_string.h>
+
+void
+debug_describe_reference(char* buf, const struct pipe_reference*ptr)
+{
+   strcpy(buf, "pipe_object");
+}
+
+void
+debug_describe_resource(char* buf, const struct pipe_resource *ptr)
+{
+   if(ptr->target == PIPE_BUFFER)
+      util_sprintf(buf, "pipe_buffer<%u>", util_format_get_stride(ptr->format, ptr->width0));
+   else if(ptr->target == PIPE_TEXTURE_1D)
+      util_sprintf(buf, "pipe_texture1d<%u,%s,%u>", ptr->width0, util_format_short_name(ptr->format), ptr->last_level);
+   else if(ptr->target == PIPE_TEXTURE_2D)
+      util_sprintf(buf, "pipe_texture2d<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
+   else if(ptr->target == PIPE_TEXTURE_CUBE)
+      util_sprintf(buf, "pipe_texture_cube<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
+   else if(ptr->target == PIPE_TEXTURE_3D)
+      util_sprintf(buf, "pipe_texture3d<%u,%u,%u,%s,%u>", ptr->width0, ptr->height0, ptr->depth0, util_format_short_name(ptr->format), ptr->last_level);
+   else
+      util_sprintf(buf, "pipe_martian_resource<%u>", ptr->target);
+}
+
+void
+debug_describe_surface(char* buf, const struct pipe_surface *ptr)
+{
+   char res[128];
+   debug_describe_resource(res, ptr->texture);
+   util_sprintf(buf, "pipe_surface<%s,%u,%u,%u>", res, ptr->face, ptr->level, ptr->zslice);
+}
+
+void
+debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr)
+{
+   char res[128];
+   debug_describe_resource(res, ptr->texture);
+   util_sprintf(buf, "pipe_sampler_view<%s,%s>", res, util_format_short_name(ptr->format));
+}
diff --git a/src/gallium/auxiliary/util/u_debug_describe.h b/src/gallium/auxiliary/util/u_debug_describe.h
new file mode 100644
index 0000000000..cab614bdc2
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_describe.h
@@ -0,0 +1,10 @@
+#ifndef U_DEBUG_DESCRIBE_H_
+#define U_DEBUG_DESCRIBE_H_
+
+/* a 256-byte buffer is necessary and sufficient */
+void debug_describe_reference(char* buf, const struct pipe_reference*ptr);
+void debug_describe_resource(char* buf, const struct pipe_resource *ptr);
+void debug_describe_surface(char* buf, const struct pipe_surface *ptr);
+void debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr);
+
+#endif /* U_DEBUG_DESCRIBE_H_ */
-- 
cgit v1.2.3


From 64c4f9c56645768aa3cc4a9a60b266a20acca0c2 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 18 Aug 2010 00:38:19 +0200
Subject: u_debug_symbol: support getting a string without output

---
 src/gallium/auxiliary/util/u_debug_symbol.c | 38 ++++++++++++++++++-----------
 src/gallium/auxiliary/util/u_debug_symbol.h |  3 +++
 2 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c
index 6e250575d6..7147bbc32b 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.c
+++ b/src/gallium/auxiliary/util/u_debug_symbol.c
@@ -33,6 +33,7 @@
  */
 
 #include "pipe/p_compiler.h"
+#include "u_string.h"
 
 #include "u_debug.h"
 #include "u_debug_symbol.h"
@@ -113,8 +114,8 @@ BOOL WINAPI j_SymGetSymFromAddr(HANDLE hProcess, DWORD Address, PDWORD Displacem
 }
 
 
-static INLINE boolean
-debug_symbol_print_imagehlp(const void *addr)
+static INLINE void
+debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size)
 {
    HANDLE hProcess;
    BYTE symbolBuffer[1024];
@@ -131,25 +132,34 @@ debug_symbol_print_imagehlp(const void *addr)
       if(j_SymInitialize(hProcess, NULL, TRUE))
          bSymInitialized = TRUE;
    }
-      
-   if(!j_SymGetSymFromAddr(hProcess, (DWORD)addr, &dwDisplacement, pSymbol))
-      return FALSE;
 
-   debug_printf("\t%s\n", pSymbol->Name);
-
-   return TRUE;
-   
+   if(!j_SymGetSymFromAddr(hProcess, (DWORD)addr, &dwDisplacement, pSymbol))
+      buf[0] = 0;
+   else
+   {
+      strncpy(buf, pSymbol->Name, size);
+      buf[size - 1] = 0;
+   }
 }
 #endif
 
-
 void
-debug_symbol_print(const void *addr)
+debug_symbol_name(const void *addr, char* buf, unsigned size)
 {
 #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86)
-   if(debug_symbol_print_imagehlp(addr))
+   debug_symbol_name_imagehlp(addr, buf, size);
+   if(buf[0])
       return;
 #endif
-   
-   debug_printf("\t%p\n", addr);
+
+   util_snprintf(buf, size, "%p", addr);
+   buf[size - 1] = 0;
+}
+
+void
+debug_symbol_print(const void *addr)
+{
+   char buf[1024];
+   debug_symbol_name(addr, buf, sizeof(buf));
+   debug_printf("\t%s\n", buf);
 }
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.h b/src/gallium/auxiliary/util/u_debug_symbol.h
index 021586987b..5e283e5ba3 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.h
+++ b/src/gallium/auxiliary/util/u_debug_symbol.h
@@ -42,6 +42,9 @@ extern "C" {
 #endif
 
 
+void
+debug_symbol_name(const void *addr, char* buf, unsigned size);
+
 void
 debug_symbol_print(const void *addr);
 
-- 
cgit v1.2.3


From b3e57fc8685af44dcf35a7f429b7410e63a9a571 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 18 Aug 2010 00:39:49 +0200
Subject: u_debug_symbol: add support for getting symbol names from glibc

---
 src/gallium/auxiliary/util/u_debug_symbol.c | 23 +++++++++++++++++++++++
 src/gallium/tools/addr2line.sh              | 26 ++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100755 src/gallium/tools/addr2line.sh

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c
index 7147bbc32b..ebea517f90 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.c
+++ b/src/gallium/auxiliary/util/u_debug_symbol.c
@@ -143,6 +143,23 @@ debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size)
 }
 #endif
 
+#ifdef __GLIBC__
+#include <execinfo.h>
+
+/* This can only provide dynamic symbols, or binary offsets into a file.
+ *
+ * To fix this, post-process the output with tools/addr2line.sh
+ */
+static INLINE void
+debug_symbol_name_glibc(const void *addr, char* buf, unsigned size)
+{
+   char** syms = backtrace_symbols((void**)&addr, 1);
+   strncpy(buf, syms[0], size);
+   buf[size - 1] = 0;
+   free(syms);
+}
+#endif
+
 void
 debug_symbol_name(const void *addr, char* buf, unsigned size)
 {
@@ -152,6 +169,12 @@ debug_symbol_name(const void *addr, char* buf, unsigned size)
       return;
 #endif
 
+#ifdef __GLIBC__
+   debug_symbol_name_glibc(addr, buf, size);
+   if(buf[0])
+      return;
+#endif
+
    util_snprintf(buf, size, "%p", addr);
    buf[size - 1] = 0;
 }
diff --git a/src/gallium/tools/addr2line.sh b/src/gallium/tools/addr2line.sh
new file mode 100755
index 0000000000..34dec14271
--- /dev/null
+++ b/src/gallium/tools/addr2line.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# This script processes symbols output by Gallium using glibc to human-readable function names
+
+lastbin=
+i=-1
+dir="$(mktemp -d)"
+input="$1"
+
+# Gather all unique addresses for each binary
+sed -nre 's|([^ ]*/[^ ]*)\(\+0x([^)]*).*|\1 \2|p' "$input"|sort|uniq|while read bin addr; do
+	if test "$lastbin" != "$bin"; then
+		((++i))
+		lastbin="$bin"
+		echo "$bin" > "$dir/$i.addrs.bin"
+	fi
+	echo "$addr" >> "$dir/$i.addrs"
+done
+
+# Construct a sed script to convert hex address to human readable form, and apply it
+for i in "$dir"/*.addrs; do
+	bin="$(<"$i.bin")"
+	addr2line -p -e "$bin" -a -f < "$i"|sed -nre 's@^0x0*([^:]*): ([^?]*)$@s|'"$bin"'(+0x\1)|\2|g@gp'
+	rm -f "$i" "$i.bin"
+done|sed -f - "$input"
+
+rmdir "$dir"
-- 
cgit v1.2.3


From 40eef4c20cc0b4500a0d8c8538872ed4b473d737 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 18 Aug 2010 16:38:17 +0200
Subject: u_debug_symbol: add symbol name caching

Without this, any form of logging that dumps stack traces continuously
will spend a lot of time resolving symbol names.
---
 src/gallium/auxiliary/util/u_debug_symbol.c | 40 +++++++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_debug_symbol.h |  4 ++-
 2 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c
index ebea517f90..332952af88 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.c
+++ b/src/gallium/auxiliary/util/u_debug_symbol.c
@@ -33,10 +33,12 @@
  */
 
 #include "pipe/p_compiler.h"
+#include "os/os_thread.h"
 #include "u_string.h"
 
 #include "u_debug.h"
 #include "u_debug_symbol.h"
+#include "u_hash_table.h"
 
 #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86)
    
@@ -186,3 +188,41 @@ debug_symbol_print(const void *addr)
    debug_symbol_name(addr, buf, sizeof(buf));
    debug_printf("\t%s\n", buf);
 }
+
+struct util_hash_table* symbols_hash;
+pipe_mutex symbols_mutex;
+
+static unsigned hash_ptr(void* p)
+{
+   return (unsigned)(uintptr_t)p;
+}
+
+static int compare_ptr(void* a, void* b)
+{
+   if(a == b)
+      return 0;
+   else if(a < b)
+      return -1;
+   else
+      return 1;
+}
+
+const char*
+debug_symbol_name_cached(const void *addr)
+{
+   const char* name;
+   pipe_mutex_lock(symbols_mutex);
+   if(!symbols_hash)
+      symbols_hash = util_hash_table_create(hash_ptr, compare_ptr);
+   name = util_hash_table_get(symbols_hash, (void*)addr);
+   if(!name)
+   {
+      char buf[1024];
+      debug_symbol_name(addr, buf, sizeof(buf));
+      name = strdup(buf);
+
+      util_hash_table_set(symbols_hash, (void*)addr, (void*)name);
+   }
+   pipe_mutex_unlock(symbols_mutex);
+   return name;
+}
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.h b/src/gallium/auxiliary/util/u_debug_symbol.h
index 5e283e5ba3..b247706c2a 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.h
+++ b/src/gallium/auxiliary/util/u_debug_symbol.h
@@ -45,10 +45,12 @@ extern "C" {
 void
 debug_symbol_name(const void *addr, char* buf, unsigned size);
 
+const char*
+debug_symbol_name_cached(const void *addr);
+
 void
 debug_symbol_print(const void *addr);
 
-
 #ifdef	__cplusplus
 }
 #endif
-- 
cgit v1.2.3


From b1fa352db8a69883f97dd579d892291f414a67f5 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Fri, 20 Aug 2010 11:31:24 +0200
Subject: os_stream: add printf facility

---
 src/gallium/auxiliary/Makefile            |  1 +
 src/gallium/auxiliary/SConscript          |  1 +
 src/gallium/auxiliary/os/os_stream.c      | 40 +++++++++++++++++++++++++++++++
 src/gallium/auxiliary/os/os_stream.h      | 25 ++++++++++++++++++-
 src/gallium/auxiliary/os/os_stream_log.c  |  3 ++-
 src/gallium/auxiliary/os/os_stream_null.c |  8 ++++++-
 src/gallium/auxiliary/os/os_stream_stdc.c |  9 +++++++
 src/gallium/auxiliary/os/os_stream_str.c  |  1 +
 8 files changed, 85 insertions(+), 3 deletions(-)
 create mode 100644 src/gallium/auxiliary/os/os_stream.c

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 2dae479275..7bd6a33a19 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -47,6 +47,7 @@ C_SOURCES = \
 	indices/u_indices_gen.c \
 	indices/u_unfilled_gen.c \
 	os/os_misc.c \
+	os/os_stream.c \
 	os/os_stream_log.c \
 	os/os_stream_stdc.c \
 	os/os_stream_str.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 43774e3311..0ece469f36 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -95,6 +95,7 @@ source = [
     'indices/u_indices_gen.c',
     'indices/u_unfilled_gen.c',
     'os/os_misc.c',
+    'os/os_stream.c',
     'os/os_stream_log.c',
     'os/os_stream_stdc.c',
     'os/os_stream_str.c',
diff --git a/src/gallium/auxiliary/os/os_stream.c b/src/gallium/auxiliary/os/os_stream.c
new file mode 100644
index 0000000000..2d4e1852ba
--- /dev/null
+++ b/src/gallium/auxiliary/os/os_stream.c
@@ -0,0 +1,40 @@
+#include "pipe/p_config.h"
+
+#include "os_stream.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+
+int
+os_default_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+   char buf[1024];
+   int retval;
+
+   retval = util_vsnprintf(buf, sizeof(buf), format, ap);
+   if(retval <= 0)
+   {}
+   else if(retval < sizeof(buf))
+      stream->write(stream, buf, retval);
+   else
+   {
+      int alloc = sizeof(buf);
+      char* str = NULL;
+      for(;;)
+      {
+         alloc += alloc;
+         if(str)
+            FREE(str);
+         str = MALLOC(alloc);
+         if(!str)
+            return -1;
+
+         retval = util_vsnprintf(str, alloc, format, ap);
+      } while(retval >= alloc);
+
+      if(retval > 0)
+         stream->write(stream, str, retval);
+      FREE(str);
+   }
+
+   return retval;
+}
diff --git a/src/gallium/auxiliary/os/os_stream.h b/src/gallium/auxiliary/os/os_stream.h
index 693a0621e2..6c6050bb02 100644
--- a/src/gallium/auxiliary/os/os_stream.h
+++ b/src/gallium/auxiliary/os/os_stream.h
@@ -50,6 +50,9 @@ struct os_stream
 
    void
    (*flush)(struct os_stream *stream);
+
+   int
+   (*vprintf)(struct os_stream *stream, const char* format, va_list ap);
 };
 
 
@@ -90,6 +93,27 @@ os_stream_flush(struct os_stream *stream)
    stream->flush(stream);
 }
 
+int
+os_default_stream_vprintf (struct os_stream* stream, const char *format, va_list ap);
+
+static INLINE int
+os_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+   return stream->vprintf(stream, format, ap);
+}
+
+static INLINE int
+os_stream_printf (struct os_stream* stream, const char *format, ...)
+{
+   int retval;
+   va_list args;
+
+   va_start (args, format);
+   retval = stream->vprintf(stream, format, args);
+   va_end (args);
+
+   return retval;
+}
 
 struct os_stream *
 os_file_stream_create(const char *filename);
@@ -118,5 +142,4 @@ os_str_stream_get_and_close(struct os_stream *stream);
 #define os_file_stream_create(_filename) os_null_stream_create()
 #endif
 
-
 #endif /* _OS_STREAM_H_ */
diff --git a/src/gallium/auxiliary/os/os_stream_log.c b/src/gallium/auxiliary/os/os_stream_log.c
index 7cc2028a22..b01377c346 100644
--- a/src/gallium/auxiliary/os/os_stream_log.c
+++ b/src/gallium/auxiliary/os/os_stream_log.c
@@ -73,7 +73,8 @@ static struct os_stream
 os_log_stream_struct = {
    &os_log_stream_close,
    &os_log_stream_write,
-   &os_log_stream_flush
+   &os_log_stream_flush,
+   &os_default_stream_vprintf,
 };
 
 
diff --git a/src/gallium/auxiliary/os/os_stream_null.c b/src/gallium/auxiliary/os/os_stream_null.c
index 128c4e8f0e..a549a789e6 100644
--- a/src/gallium/auxiliary/os/os_stream_null.c
+++ b/src/gallium/auxiliary/os/os_stream_null.c
@@ -56,12 +56,18 @@ os_null_stream_flush(struct os_stream *stream)
    (void)stream;
 }
 
+static int
+os_null_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+   return 0;
+}
 
 static struct os_stream
 os_null_stream = {
    &os_null_stream_close,
    &os_null_stream_write,
-   &os_null_stream_flush
+   &os_null_stream_flush,
+   &os_null_stream_vprintf
 };
 
 
diff --git a/src/gallium/auxiliary/os/os_stream_stdc.c b/src/gallium/auxiliary/os/os_stream_stdc.c
index 9e7ed71107..37e7d063e2 100644
--- a/src/gallium/auxiliary/os/os_stream_stdc.c
+++ b/src/gallium/auxiliary/os/os_stream_stdc.c
@@ -83,6 +83,14 @@ os_stdc_stream_flush(struct os_stream *_stream)
    fflush(stream->file);
 }
 
+static int
+os_stdc_stream_vprintf (struct os_stream* _stream, const char *format, va_list ap)
+{
+   struct os_stdc_stream *stream = os_stdc_stream(_stream);
+
+   return vfprintf(stream->file, format, ap);
+}
+
 
 struct os_stream *
 os_file_stream_create(const char *filename)
@@ -96,6 +104,7 @@ os_file_stream_create(const char *filename)
    stream->base.close = &os_stdc_stream_close;
    stream->base.write = &os_stdc_stream_write;
    stream->base.flush = &os_stdc_stream_flush;
+   stream->base.vprintf = &os_stdc_stream_vprintf;
 
    stream->file = fopen(filename, "w");
    if(!stream->file)
diff --git a/src/gallium/auxiliary/os/os_stream_str.c b/src/gallium/auxiliary/os/os_stream_str.c
index b5c7270d2a..be9478b2a1 100644
--- a/src/gallium/auxiliary/os/os_stream_str.c
+++ b/src/gallium/auxiliary/os/os_stream_str.c
@@ -118,6 +118,7 @@ os_str_stream_create(size_t size)
    stream->base.close = &os_str_stream_close;
    stream->base.write = &os_str_stream_write;
    stream->base.flush = &os_str_stream_flush;
+   stream->base.vprintf = &os_default_stream_vprintf;
 
    stream->str = os_malloc(size);
    if(!stream->str)
-- 
cgit v1.2.3


From 2ff13fe89e8f0b372512f16bb64d5a703e9bf891 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 18 Aug 2010 00:40:33 +0200
Subject: auxiliary: add reference count debugging code

---
 src/gallium/auxiliary/Makefile              |   1 +
 src/gallium/auxiliary/SConscript            |   1 +
 src/gallium/auxiliary/util/u_debug_refcnt.c | 156 ++++++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_debug_refcnt.h |  29 ++++++
 4 files changed, 187 insertions(+)
 create mode 100644 src/gallium/auxiliary/util/u_debug_refcnt.c
 create mode 100644 src/gallium/auxiliary/util/u_debug_refcnt.h

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 7bd6a33a19..287ee8c29f 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -94,6 +94,7 @@ C_SOURCES = \
 	translate/translate_cache.c \
 	util/u_debug.c \
 	util/u_debug_describe.c \
+	util/u_debug_refcnt.c \
 	util/u_debug_symbol.c \
 	util/u_debug_stack.c \
 	util/u_dump_defines.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 0ece469f36..93bfe9f01f 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -149,6 +149,7 @@ source = [
     'util/u_debug.c',
     'util/u_debug_describe.c',
     'util/u_debug_memory.c',
+    'util/u_debug_refcnt.c',
     'util/u_debug_stack.c',
     'util/u_debug_symbol.c',
     'util/u_dump_defines.c',
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.c b/src/gallium/auxiliary/util/u_debug_refcnt.c
new file mode 100644
index 0000000000..9d6fca56ab
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.c
@@ -0,0 +1,156 @@
+#if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
+
+/* see http://www.mozilla.org/performance/refcnt-balancer.html for what do with the output
+ * on Linux, use tools/addr2line.sh to postprocess it before anything else
+ **/
+#include <util/u_debug.h>
+#include <util/u_debug_refcnt.h>
+#include <util/u_debug_stack.h>
+#include <util/u_debug_symbol.h>
+#include <util/u_string.h>
+#include <util/u_hash_table.h>
+#include <os/os_thread.h>
+#include <os/os_stream.h>
+
+int debug_refcnt_state;
+
+struct os_stream* stream;
+
+/* TODO: maybe move this serial machinery to a stand-alone module and expose it? */
+static pipe_mutex serials_mutex;
+static struct util_hash_table* serials_hash;
+static unsigned serials_last;
+
+static unsigned hash_ptr(void* p)
+{
+   return (unsigned)(uintptr_t)p;
+}
+
+static int compare_ptr(void* a, void* b)
+{
+   if(a == b)
+      return 0;
+   else if(a < b)
+      return -1;
+   else
+      return 1;
+}
+
+static boolean debug_serial(void* p, unsigned* pserial)
+{
+   unsigned serial;
+   boolean found = TRUE;
+   pipe_mutex_lock(serials_mutex);
+   if(!serials_hash)
+      serials_hash = util_hash_table_create(hash_ptr, compare_ptr);
+   serial = (unsigned)(uintptr_t)util_hash_table_get(serials_hash, p);
+   if(!serial)
+   {
+      /* time to stop logging... (you'll have a 100 GB logfile at least at this point)
+       * TODO: avoid this
+       */
+      serial = ++serials_last;
+      if(!serial)
+      {
+         debug_error("More than 2^32 objects detected, aborting.\n");
+         os_abort();
+      }
+
+      util_hash_table_set(serials_hash, p, (void*)(uintptr_t)serial);
+      found = FALSE;
+   }
+   pipe_mutex_unlock(serials_mutex);
+   *pserial = serial;
+   return found;
+}
+
+static void debug_serial_delete(void* p)
+{
+   pipe_mutex_lock(serials_mutex);
+   util_hash_table_remove(serials_hash, p);
+   pipe_mutex_unlock(serials_mutex);
+}
+
+#define STACK_LEN 64
+
+static void dump_stack(const char* symbols[STACK_LEN])
+{
+   unsigned i;
+   for(i = 0; i < STACK_LEN; ++i)
+   {
+      if(symbols[i])
+         os_stream_printf(stream, "%s\n", symbols[i]);
+   }
+   os_stream_write(stream, "\n", 1);
+}
+
+void debug_reference_slowpath(const struct pipe_reference* p, void* pget_desc, int change)
+{
+   if(debug_refcnt_state < 0)
+      return;
+
+   if(!debug_refcnt_state)
+   {
+      const char* filename = debug_get_option("GALLIUM_REFCNT_LOG", NULL);
+      if(filename && filename[0])
+         stream = os_file_stream_create(filename);
+
+      if(stream)
+         debug_refcnt_state = 1;
+      else
+         debug_refcnt_state = -1;
+   }
+
+   if(debug_refcnt_state > 0)
+   {
+      struct debug_stack_frame frames[STACK_LEN];
+      const char* symbols[STACK_LEN];
+      char buf[1024];
+
+      void (*get_desc)(char*, const struct pipe_reference*) = pget_desc;
+      unsigned i;
+      unsigned refcnt = p->count;
+      unsigned serial;
+      boolean existing = debug_serial((void*)p, &serial);
+
+      debug_backtrace_capture(frames, 1, STACK_LEN);
+      for(i = 0; i < STACK_LEN; ++i)
+      {
+         if(frames[i].function)
+            symbols[i] = debug_symbol_name_cached(frames[i].function);
+         else
+            symbols[i] = 0;
+      }
+
+      get_desc(buf, p);
+
+      if(!existing)
+      {
+         os_stream_printf(stream, "<%s> %p %u Create\n", buf, p, serial);
+         dump_stack(symbols);
+
+         /* this is there to provide a gradual change even if we don't see the initialization */
+         for(i = 1; i <= refcnt - change; ++i)
+         {
+            os_stream_printf(stream, "<%s> %p %u AddRef %u\n", buf, p, serial, i);
+            dump_stack(symbols);
+         }
+      }
+
+      if(change)
+      {
+         os_stream_printf(stream, "<%s> %p %u %s %u\n", buf, p, serial, change > 0 ? "AddRef" : "Release", refcnt);
+         dump_stack(symbols);
+      }
+
+      if(!refcnt)
+      {
+         debug_serial_delete((void*)p);
+         os_stream_printf(stream, "<%s> %p %u Destroy\n", buf, p, serial);
+         dump_stack(symbols);
+      }
+
+      os_stream_flush(stream);
+   }
+}
+#endif
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.h b/src/gallium/auxiliary/util/u_debug_refcnt.h
new file mode 100644
index 0000000000..e48a2a645c
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.h
@@ -0,0 +1,29 @@
+/*
+ * u_debug_refcnt.h
+ *
+ *  Created on: Aug 17, 2010
+ *      Author: lb
+ */
+
+#ifndef U_DEBUG_REFCNT_H_
+#define U_DEBUG_REFCNT_H_
+
+#include <pipe/p_config.h>
+#include <pipe/p_state.h>
+
+#if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
+extern int debug_refcnt_state;
+
+void debug_reference_slowpath(const struct pipe_reference* p, void* get_desc, int change);
+
+static INLINE void debug_reference(const struct pipe_reference* p, void* get_desc, int change)
+{
+	if(debug_refcnt_state >= 0)
+		debug_reference_slowpath(p, get_desc, change);
+}
+#else
+static INLINE void debug_reference(const struct pipe_reference* p, void* get_desc, const char* op)
+{}
+#endif
+
+#endif /* U_DEBUG_REFCNT_H_ */
-- 
cgit v1.2.3


From c806a40277bb5d2dab07908ef79078b0fcc56336 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 18 Aug 2010 00:41:10 +0200
Subject: gallium: hook up reference count debugging code

This commit adds the ability to produce a log file containing all
reference count changes, and object creation/destruction, on Gallium
objects.

The data allows to answer these crucial questions:
1. This app is exhausting all my memory due to a resource leak: where
   is the bug?
2. Which resources is this app using at a given moment? Which parts of
   the code created them?
3. What kinds of resources does this app use?
4. How fast does this app create and destroy resources? Which parts of
   the code create resources fast?

The output is compatible with the one produced by the similar facility
in Mozilla Firefox, allowing to use Mozilla's tools to analyze the data.

To get the log file:
export GALLIUM_REFCNT_LOG=<file>

To get function names and source lines in the log file:
tools/addr2line.sh <file>

To process the log file, see:
http://www.mozilla.org/performance/refcnt-balancer.html
---
 src/gallium/auxiliary/util/u_inlines.h | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index 540305c146..90b0903e3f 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -33,6 +33,8 @@
 #include "pipe/p_state.h"
 #include "pipe/p_screen.h"
 #include "util/u_debug.h"
+#include "util/u_debug_describe.h"
+#include "util/u_debug_refcnt.h"
 #include "util/u_atomic.h"
 #include "util/u_box.h"
 #include "util/u_math.h"
@@ -67,7 +69,7 @@ pipe_is_referenced(struct pipe_reference *reference)
  * \return TRUE if the object's refcount hits zero and should be destroyed.
  */
 static INLINE boolean
-pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
+pipe_reference_described(struct pipe_reference *ptr, struct pipe_reference *reference, void* get_desc)
 {
    boolean destroy = FALSE;
 
@@ -76,6 +78,7 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
       if (reference) {
          assert(pipe_is_referenced(reference));
          p_atomic_inc(&reference->count);
+         debug_reference(reference, get_desc, 1);
       }
 
       if (ptr) {
@@ -83,41 +86,45 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
          if (p_atomic_dec_zero(&ptr->count)) {
             destroy = TRUE;
          }
+         debug_reference(ptr, get_desc, -1);
       }
    }
 
    return destroy;
 }
 
+static INLINE boolean
+pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
+{
+   return pipe_reference_described(ptr, reference, debug_describe_reference);
+}
 
 static INLINE void
 pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
 {
    struct pipe_surface *old_surf = *ptr;
 
-   if (pipe_reference(&(*ptr)->reference, &surf->reference))
+   if (pipe_reference_described(&(*ptr)->reference, &surf->reference, debug_describe_surface))
       old_surf->texture->screen->tex_surface_destroy(old_surf);
    *ptr = surf;
 }
 
-
 static INLINE void
 pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
 {
    struct pipe_resource *old_tex = *ptr;
 
-   if (pipe_reference(&(*ptr)->reference, &tex->reference))
+   if (pipe_reference_described(&(*ptr)->reference, &tex->reference, debug_describe_resource))
       old_tex->screen->resource_destroy(old_tex->screen, old_tex);
    *ptr = tex;
 }
 
-
 static INLINE void
 pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_view *view)
 {
    struct pipe_sampler_view *old_view = *ptr;
 
-   if (pipe_reference(&(*ptr)->reference, &view->reference))
+   if (pipe_reference_described(&(*ptr)->reference, &view->reference, debug_describe_sampler_view))
       old_view->context->sampler_view_destroy(old_view->context, old_view);
    *ptr = view;
 }
-- 
cgit v1.2.3


From a43a2f0662c4aa33b50882411181252198819942 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Fri, 20 Aug 2010 18:51:22 +0200
Subject: util: Fix build for C++ compilers.

---
 src/gallium/auxiliary/util/u_debug_describe.h | 8 ++++++++
 src/gallium/auxiliary/util/u_debug_refcnt.h   | 8 ++++++++
 2 files changed, 16 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_debug_describe.h b/src/gallium/auxiliary/util/u_debug_describe.h
index cab614bdc2..8c32f02ee5 100644
--- a/src/gallium/auxiliary/util/u_debug_describe.h
+++ b/src/gallium/auxiliary/util/u_debug_describe.h
@@ -1,10 +1,18 @@
 #ifndef U_DEBUG_DESCRIBE_H_
 #define U_DEBUG_DESCRIBE_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* a 256-byte buffer is necessary and sufficient */
 void debug_describe_reference(char* buf, const struct pipe_reference*ptr);
 void debug_describe_resource(char* buf, const struct pipe_resource *ptr);
 void debug_describe_surface(char* buf, const struct pipe_surface *ptr);
 void debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* U_DEBUG_DESCRIBE_H_ */
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.h b/src/gallium/auxiliary/util/u_debug_refcnt.h
index e48a2a645c..ba40999bf2 100644
--- a/src/gallium/auxiliary/util/u_debug_refcnt.h
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.h
@@ -11,6 +11,10 @@
 #include <pipe/p_config.h>
 #include <pipe/p_state.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
 extern int debug_refcnt_state;
 
@@ -26,4 +30,8 @@ static INLINE void debug_reference(const struct pipe_reference* p, void* get_des
 {}
 #endif
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* U_DEBUG_REFCNT_H_ */
-- 
cgit v1.2.3


From 132b9439e287f1febbb49362671743a5b90e303c Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sat, 21 Aug 2010 00:39:48 +0200
Subject: os_stream: fix bugs in allocation path

---
 src/gallium/auxiliary/os/os_stream.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/os/os_stream.c b/src/gallium/auxiliary/os/os_stream.c
index 2d4e1852ba..7b9c17c5fa 100644
--- a/src/gallium/auxiliary/os/os_stream.c
+++ b/src/gallium/auxiliary/os/os_stream.c
@@ -9,28 +9,20 @@ os_default_stream_vprintf (struct os_stream* stream, const char *format, va_list
 {
    char buf[1024];
    int retval;
-
-   retval = util_vsnprintf(buf, sizeof(buf), format, ap);
+   va_list ap2;
+   va_copy(ap2, ap);
+   retval = util_vsnprintf(buf, sizeof(buf), format, ap2);
+   va_end(ap2);
    if(retval <= 0)
    {}
    else if(retval < sizeof(buf))
       stream->write(stream, buf, retval);
    else
    {
-      int alloc = sizeof(buf);
-      char* str = NULL;
-      for(;;)
-      {
-         alloc += alloc;
-         if(str)
-            FREE(str);
-         str = MALLOC(alloc);
-         if(!str)
-            return -1;
-
-         retval = util_vsnprintf(str, alloc, format, ap);
-      } while(retval >= alloc);
-
+      char* str = MALLOC(retval + 1);
+      if(!str)
+         return -1;
+      retval = util_vsnprintf(str, retval + 1, format, ap);
       if(retval > 0)
          stream->write(stream, str, retval);
       FREE(str);
-- 
cgit v1.2.3


From 7a40d15e6c6b8ebc971be0e926c7027a85db96a0 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 21 Aug 2010 10:07:12 +0100
Subject: util: Remove the x86 exception handlers.

Unused now that check_os_katmai_support was removed.
---
 src/gallium/auxiliary/util/u_cpu_detect.c | 55 -------------------------------
 1 file changed, 55 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index b9b9f9257a..f33d6b3461 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -83,61 +83,6 @@ static int has_cpuid(void);
 #endif
 
 
-#if defined(PIPE_ARCH_X86)
-
-/* The sigill handlers */
-#if defined(PIPE_OS_LINUX) /*&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)*/
-static void
-sigill_handler_sse(int signal, struct sigcontext sc)
-{
-   /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
-    * instructions are 3 bytes long.  We must increment the instruction
-    * pointer manually to avoid repeated execution of the offending
-    * instruction.
-    *
-    * If the SIGILL is caused by a divide-by-zero when unmasked
-    * exceptions aren't supported, the SIMD FPU status and control
-    * word will be restored at the end of the test, so we don't need
-    * to worry about doing it here.  Besides, we may not be able to...
-    */
-   sc.eip += 3;
-
-   util_cpu_caps.has_sse=0;
-}
-
-static void
-sigfpe_handler_sse(int signal, struct sigcontext sc)
-{
-   if (sc.fpstate->magic != 0xffff) {
-      /* Our signal context has the extended FPU state, so reset the
-       * divide-by-zero exception mask and clear the divide-by-zero
-       * exception bit.
-       */
-      sc.fpstate->mxcsr |= 0x00000200;
-      sc.fpstate->mxcsr &= 0xfffffffb;
-   } else {
-      /* If we ever get here, we're completely hosed.
-      */
-   }
-}
-#endif /* PIPE_OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */
-
-#if defined(PIPE_OS_WINDOWS)
-static LONG CALLBACK
-win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
-{
-   if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){
-      ep->ContextRecord->Eip +=3;
-      util_cpu_caps.has_sse=0;
-      return EXCEPTION_CONTINUE_EXECUTION;
-   }
-   return EXCEPTION_CONTINUE_SEARCH;
-}
-#endif /* PIPE_OS_WINDOWS */
-
-#endif /* PIPE_ARCH_X86 */
-
-
 #if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE)
 static jmp_buf  __lv_powerpc_jmpbuf;
 static volatile sig_atomic_t __lv_powerpc_canjump = 0;
-- 
cgit v1.2.3


From 04c2a22175d7c27ee380f986eece2772eddd6fcc Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 21 Aug 2010 10:34:42 +0100
Subject: util: Make the reference debuggin code more C++ friendly.

C++ doesn't accept function <-> void* conversions without a putting a
fight.
---
 src/gallium/auxiliary/util/u_debug_describe.h |  5 +++++
 src/gallium/auxiliary/util/u_debug_refcnt.c   |  3 +--
 src/gallium/auxiliary/util/u_debug_refcnt.h   | 19 +++++++++++++------
 src/gallium/auxiliary/util/u_inlines.h        | 16 +++++++++++-----
 4 files changed, 30 insertions(+), 13 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_debug_describe.h b/src/gallium/auxiliary/util/u_debug_describe.h
index 8c32f02ee5..33587ec879 100644
--- a/src/gallium/auxiliary/util/u_debug_describe.h
+++ b/src/gallium/auxiliary/util/u_debug_describe.h
@@ -5,6 +5,11 @@
 extern "C" {
 #endif
 
+struct pipe_reference;
+struct pipe_resource;
+struct pipe_surface;
+struct pipe_sampler_view;
+
 /* a 256-byte buffer is necessary and sufficient */
 void debug_describe_reference(char* buf, const struct pipe_reference*ptr);
 void debug_describe_resource(char* buf, const struct pipe_resource *ptr);
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.c b/src/gallium/auxiliary/util/u_debug_refcnt.c
index 9d6fca56ab..32e09ae1ae 100644
--- a/src/gallium/auxiliary/util/u_debug_refcnt.c
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.c
@@ -84,7 +84,7 @@ static void dump_stack(const char* symbols[STACK_LEN])
    os_stream_write(stream, "\n", 1);
 }
 
-void debug_reference_slowpath(const struct pipe_reference* p, void* pget_desc, int change)
+void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
 {
    if(debug_refcnt_state < 0)
       return;
@@ -107,7 +107,6 @@ void debug_reference_slowpath(const struct pipe_reference* p, void* pget_desc, i
       const char* symbols[STACK_LEN];
       char buf[1024];
 
-      void (*get_desc)(char*, const struct pipe_reference*) = pget_desc;
       unsigned i;
       unsigned refcnt = p->count;
       unsigned serial;
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.h b/src/gallium/auxiliary/util/u_debug_refcnt.h
index ba40999bf2..4c4a18ecf9 100644
--- a/src/gallium/auxiliary/util/u_debug_refcnt.h
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.h
@@ -15,19 +15,26 @@
 extern "C" {
 #endif
 
+typedef void (*debug_reference_descriptor)(char*, const struct pipe_reference*);
+
 #if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
+
 extern int debug_refcnt_state;
 
-void debug_reference_slowpath(const struct pipe_reference* p, void* get_desc, int change);
+void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change);
 
-static INLINE void debug_reference(const struct pipe_reference* p, void* get_desc, int change)
+static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
 {
-	if(debug_refcnt_state >= 0)
-		debug_reference_slowpath(p, get_desc, change);
+   if (debug_refcnt_state >= 0)
+      debug_reference_slowpath(p, get_desc, change);
 }
+
 #else
-static INLINE void debug_reference(const struct pipe_reference* p, void* get_desc, const char* op)
-{}
+
+static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+{
+}
+
 #endif
 
 #ifdef __cplusplus
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index 90b0903e3f..78473bf35a 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -69,7 +69,9 @@ pipe_is_referenced(struct pipe_reference *reference)
  * \return TRUE if the object's refcount hits zero and should be destroyed.
  */
 static INLINE boolean
-pipe_reference_described(struct pipe_reference *ptr, struct pipe_reference *reference, void* get_desc)
+pipe_reference_described(struct pipe_reference *ptr, 
+                         struct pipe_reference *reference, 
+                         debug_reference_descriptor get_desc)
 {
    boolean destroy = FALSE;
 
@@ -96,7 +98,8 @@ pipe_reference_described(struct pipe_reference *ptr, struct pipe_reference *refe
 static INLINE boolean
 pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
 {
-   return pipe_reference_described(ptr, reference, debug_describe_reference);
+   return pipe_reference_described(ptr, reference, 
+                                   (debug_reference_descriptor)debug_describe_reference);
 }
 
 static INLINE void
@@ -104,7 +107,8 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
 {
    struct pipe_surface *old_surf = *ptr;
 
-   if (pipe_reference_described(&(*ptr)->reference, &surf->reference, debug_describe_surface))
+   if (pipe_reference_described(&(*ptr)->reference, &surf->reference, 
+                                (debug_reference_descriptor)debug_describe_surface))
       old_surf->texture->screen->tex_surface_destroy(old_surf);
    *ptr = surf;
 }
@@ -114,7 +118,8 @@ pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
 {
    struct pipe_resource *old_tex = *ptr;
 
-   if (pipe_reference_described(&(*ptr)->reference, &tex->reference, debug_describe_resource))
+   if (pipe_reference_described(&(*ptr)->reference, &tex->reference, 
+                                (debug_reference_descriptor)debug_describe_resource))
       old_tex->screen->resource_destroy(old_tex->screen, old_tex);
    *ptr = tex;
 }
@@ -124,7 +129,8 @@ pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_
 {
    struct pipe_sampler_view *old_view = *ptr;
 
-   if (pipe_reference_described(&(*ptr)->reference, &view->reference, debug_describe_sampler_view))
+   if (pipe_reference_described(&(*ptr)->reference, &view->reference,
+                                (debug_reference_descriptor)debug_describe_sampler_view))
       old_view->context->sampler_view_destroy(old_view->context, old_view);
    *ptr = view;
 }
-- 
cgit v1.2.3


From 121aa3cfcb106be1ecaae102177bb882bfbce9d9 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 21 Aug 2010 10:38:22 +0100
Subject: util: Match printf format to silence warning.

---
 src/gallium/auxiliary/util/u_debug_describe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_debug_describe.c b/src/gallium/auxiliary/util/u_debug_describe.c
index 5c7808f7ec..343358d0c4 100644
--- a/src/gallium/auxiliary/util/u_debug_describe.c
+++ b/src/gallium/auxiliary/util/u_debug_describe.c
@@ -13,7 +13,7 @@ void
 debug_describe_resource(char* buf, const struct pipe_resource *ptr)
 {
    if(ptr->target == PIPE_BUFFER)
-      util_sprintf(buf, "pipe_buffer<%u>", util_format_get_stride(ptr->format, ptr->width0));
+      util_sprintf(buf, "pipe_buffer<%u>", (unsigned)util_format_get_stride(ptr->format, ptr->width0));
    else if(ptr->target == PIPE_TEXTURE_1D)
       util_sprintf(buf, "pipe_texture1d<%u,%s,%u>", ptr->width0, util_format_short_name(ptr->format), ptr->last_level);
    else if(ptr->target == PIPE_TEXTURE_2D)
-- 
cgit v1.2.3


From fa32fde26cbb770c6ffa0a0ead529d511eab1eb1 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sat, 21 Aug 2010 12:37:18 +0200
Subject: auxiliary: add copyright headers

Thanks to Jose Fonseca for pointing out they were missing.
---
 src/gallium/auxiliary/os/os_stream.c          | 26 ++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_debug_describe.c | 26 ++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_debug_describe.h | 26 ++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_debug_refcnt.c   | 26 ++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_debug_refcnt.h   | 29 ++++++++++++++++++++++-----
 src/gallium/auxiliary/util/u_dirty_surfaces.h | 26 ++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_staging.c        | 26 ++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_staging.h        | 26 ++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_surfaces.c       | 26 ++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_surfaces.h       | 26 ++++++++++++++++++++++++
 10 files changed, 258 insertions(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/os/os_stream.c b/src/gallium/auxiliary/os/os_stream.c
index 7b9c17c5fa..3c55fc00d9 100644
--- a/src/gallium/auxiliary/os/os_stream.c
+++ b/src/gallium/auxiliary/os/os_stream.c
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
 #include "pipe/p_config.h"
 
 #include "os_stream.h"
diff --git a/src/gallium/auxiliary/util/u_debug_describe.c b/src/gallium/auxiliary/util/u_debug_describe.c
index 343358d0c4..52bbf53be3 100644
--- a/src/gallium/auxiliary/util/u_debug_describe.c
+++ b/src/gallium/auxiliary/util/u_debug_describe.c
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
 #include <pipe/p_state.h>
 #include <util/u_format.h>
 #include <util/u_debug_describe.h>
diff --git a/src/gallium/auxiliary/util/u_debug_describe.h b/src/gallium/auxiliary/util/u_debug_describe.h
index 33587ec879..26d1f803bf 100644
--- a/src/gallium/auxiliary/util/u_debug_describe.h
+++ b/src/gallium/auxiliary/util/u_debug_describe.h
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
 #ifndef U_DEBUG_DESCRIBE_H_
 #define U_DEBUG_DESCRIBE_H_
 
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.c b/src/gallium/auxiliary/util/u_debug_refcnt.c
index 32e09ae1ae..40a26c9c69 100644
--- a/src/gallium/auxiliary/util/u_debug_refcnt.c
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.c
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
 #if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
 
 /* see http://www.mozilla.org/performance/refcnt-balancer.html for what do with the output
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.h b/src/gallium/auxiliary/util/u_debug_refcnt.h
index 4c4a18ecf9..bea2d1c478 100644
--- a/src/gallium/auxiliary/util/u_debug_refcnt.h
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.h
@@ -1,9 +1,28 @@
-/*
- * u_debug_refcnt.h
+/**************************************************************************
  *
- *  Created on: Aug 17, 2010
- *      Author: lb
- */
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
 
 #ifndef U_DEBUG_REFCNT_H_
 #define U_DEBUG_REFCNT_H_
diff --git a/src/gallium/auxiliary/util/u_dirty_surfaces.h b/src/gallium/auxiliary/util/u_dirty_surfaces.h
index 99f260bf96..c157300502 100644
--- a/src/gallium/auxiliary/util/u_dirty_surfaces.h
+++ b/src/gallium/auxiliary/util/u_dirty_surfaces.h
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
 #ifndef U_DIRTY_SURFACES_H_
 #define U_DIRTY_SURFACES_H_
 
diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c
index 363e1c864b..c5d68f8df8 100644
--- a/src/gallium/auxiliary/util/u_staging.c
+++ b/src/gallium/auxiliary/util/u_staging.c
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
 #include "util/u_staging.h"
 #include "pipe/p_context.h"
 #include "util/u_memory.h"
diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h
index 3a9da9b401..1aab78cc88 100644
--- a/src/gallium/auxiliary/util/u_staging.h
+++ b/src/gallium/auxiliary/util/u_staging.h
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
 /* Direct3D 10/11 has no concept of transfers. Applications instead
  * create resources with a STAGING or DYNAMIC usage, copy between them
  * and the real resource and use Map to map the STAGING/DYNAMIC resource.
diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c
index 7733ad24d0..404e121995 100644
--- a/src/gallium/auxiliary/util/u_surfaces.c
+++ b/src/gallium/auxiliary/util/u_surfaces.c
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
 #include "u_surfaces.h"
 #include "util/u_hash_table.h"
 #include "util/u_inlines.h"
diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h
index 46f3ec5d7d..17d8a5d3a5 100644
--- a/src/gallium/auxiliary/util/u_surfaces.h
+++ b/src/gallium/auxiliary/util/u_surfaces.h
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
 #ifndef U_SURFACES_H_
 #define U_SURFACES_H_
 
-- 
cgit v1.2.3


From 061c2a7cb3924f1983554aa1d53b78238196c412 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sat, 21 Aug 2010 12:45:39 +0200
Subject: u_debug_describe: add PIPE_TEXTURE_RECT

---
 src/gallium/auxiliary/util/u_debug_describe.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_debug_describe.c b/src/gallium/auxiliary/util/u_debug_describe.c
index 52bbf53be3..f21ebd02f5 100644
--- a/src/gallium/auxiliary/util/u_debug_describe.c
+++ b/src/gallium/auxiliary/util/u_debug_describe.c
@@ -44,6 +44,8 @@ debug_describe_resource(char* buf, const struct pipe_resource *ptr)
       util_sprintf(buf, "pipe_texture1d<%u,%s,%u>", ptr->width0, util_format_short_name(ptr->format), ptr->last_level);
    else if(ptr->target == PIPE_TEXTURE_2D)
       util_sprintf(buf, "pipe_texture2d<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
+   else if(ptr->target == PIPE_TEXTURE_RECT)
+      util_sprintf(buf, "pipe_texture_rect<%u,%u,%s>", ptr->width0, ptr->height0, util_format_short_name(ptr->format));
    else if(ptr->target == PIPE_TEXTURE_CUBE)
       util_sprintf(buf, "pipe_texture_cube<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
    else if(ptr->target == PIPE_TEXTURE_3D)
-- 
cgit v1.2.3


From bed9dff9d94e33d34340183d7cb633869dcb4f90 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sat, 21 Aug 2010 12:47:18 +0200
Subject: u_debug_describe: use switch instead of if chain

---
 src/gallium/auxiliary/util/u_debug_describe.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_debug_describe.c b/src/gallium/auxiliary/util/u_debug_describe.c
index f21ebd02f5..1c90ff3106 100644
--- a/src/gallium/auxiliary/util/u_debug_describe.c
+++ b/src/gallium/auxiliary/util/u_debug_describe.c
@@ -38,20 +38,30 @@ debug_describe_reference(char* buf, const struct pipe_reference*ptr)
 void
 debug_describe_resource(char* buf, const struct pipe_resource *ptr)
 {
-   if(ptr->target == PIPE_BUFFER)
+   switch(ptr->target)
+   {
+   case PIPE_BUFFER:
       util_sprintf(buf, "pipe_buffer<%u>", (unsigned)util_format_get_stride(ptr->format, ptr->width0));
-   else if(ptr->target == PIPE_TEXTURE_1D)
+      break;
+   case PIPE_TEXTURE_1D:
       util_sprintf(buf, "pipe_texture1d<%u,%s,%u>", ptr->width0, util_format_short_name(ptr->format), ptr->last_level);
-   else if(ptr->target == PIPE_TEXTURE_2D)
+      break;
+   case PIPE_TEXTURE_2D:
       util_sprintf(buf, "pipe_texture2d<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
-   else if(ptr->target == PIPE_TEXTURE_RECT)
+      break;
+   case PIPE_TEXTURE_RECT:
       util_sprintf(buf, "pipe_texture_rect<%u,%u,%s>", ptr->width0, ptr->height0, util_format_short_name(ptr->format));
-   else if(ptr->target == PIPE_TEXTURE_CUBE)
+      break;
+   case PIPE_TEXTURE_CUBE:
       util_sprintf(buf, "pipe_texture_cube<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
-   else if(ptr->target == PIPE_TEXTURE_3D)
+      break;
+   case PIPE_TEXTURE_3D:
       util_sprintf(buf, "pipe_texture3d<%u,%u,%u,%s,%u>", ptr->width0, ptr->height0, ptr->depth0, util_format_short_name(ptr->format), ptr->last_level);
-   else
+      break;
+   default:
       util_sprintf(buf, "pipe_martian_resource<%u>", ptr->target);
+      break;
+   }
 }
 
 void
-- 
cgit v1.2.3


From e6ff995d14085caa447c4e8634bf069c8a94f0ec Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Thu, 25 Feb 2010 13:08:35 +0100
Subject: gallium/auxiliary: add semantic linkage utility code

---
 src/gallium/auxiliary/Makefile         |   1 +
 src/gallium/auxiliary/util/u_linkage.c | 145 +++++++++++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_linkage.h |  65 +++++++++++++++
 3 files changed, 211 insertions(+)
 create mode 100644 src/gallium/auxiliary/util/u_linkage.c
 create mode 100644 src/gallium/auxiliary/util/u_linkage.h

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 287ee8c29f..a24b038974 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -123,6 +123,7 @@ C_SOURCES = \
 	util/u_hash.c \
 	util/u_keymap.c \
 	util/u_linear.c \
+	util/u_linkage.c \
 	util/u_network.c \
 	util/u_math.c \
 	util/u_mempool.c \
diff --git a/src/gallium/auxiliary/util/u_linkage.c b/src/gallium/auxiliary/util/u_linkage.c
new file mode 100644
index 0000000000..cefcb4c9f1
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linkage.c
@@ -0,0 +1,145 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_debug.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "util/u_linkage.h"
+
+/* we must only record the registers that are actually used, not just declared */
+static INLINE boolean
+util_semantic_set_test_and_set(struct util_semantic_set *set, unsigned value)
+{
+   unsigned mask = 1 << (value % (sizeof(long) * 8));
+   unsigned long *p = &set->masks[value / (sizeof(long) * 8)];
+   unsigned long v = *p & mask;
+   *p |= mask;
+   return !!v;
+}
+
+unsigned
+util_semantic_set_from_program_file(struct util_semantic_set *set, const struct tgsi_token *tokens, enum tgsi_file_type file)
+{
+   struct tgsi_shader_info info;
+   struct tgsi_parse_context parse;
+   unsigned count = 0;
+   ubyte *semantic_name;
+   ubyte *semantic_index;
+
+   tgsi_scan_shader(tokens, &info);
+
+   if(file == TGSI_FILE_INPUT)
+   {
+      semantic_name = info.input_semantic_name;
+      semantic_index = info.input_semantic_index;
+   }
+   else if(file == TGSI_FILE_OUTPUT)
+   {
+      semantic_name = info.output_semantic_name;
+      semantic_index = info.output_semantic_index;
+   }
+   else
+      assert(0);
+
+   tgsi_parse_init(&parse, tokens);
+
+   memset(set->masks, 0, sizeof(set->masks));
+   while(!tgsi_parse_end_of_tokens(&parse))
+   {
+      tgsi_parse_token(&parse);
+
+      if(parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION)
+      {
+	 const struct tgsi_full_instruction *finst = &parse.FullToken.FullInstruction;
+	 unsigned i;
+	 for(i = 0; i < finst->Instruction.NumDstRegs; ++i)
+	 {
+	    if(finst->Dst[i].Register.File == file)
+	    {
+	       unsigned idx = finst->Dst[i].Register.Index;
+	       if(semantic_name[idx] == TGSI_SEMANTIC_GENERIC)
+	       {
+		  if(!util_semantic_set_test_and_set(set, semantic_index[idx]))
+		     ++count;
+	       }
+	    }
+	 }
+
+	 for(i = 0; i < finst->Instruction.NumSrcRegs; ++i)
+	 {
+	    if(finst->Src[i].Register.File == file)
+	    {
+	       unsigned idx = finst->Src[i].Register.Index;
+	       if(semantic_name[idx] == TGSI_SEMANTIC_GENERIC)
+	       {
+		  if(!util_semantic_set_test_and_set(set, semantic_index[idx]))
+		     ++count;
+	       }
+	    }
+	 }
+      }
+   }
+   tgsi_parse_free(&parse);
+
+   return count;
+}
+
+#define UTIL_SEMANTIC_SET_FOR_EACH(i, set) for(i = 0; i < 256; ++i) if(set->masks[i / (sizeof(long) * 8)] & (1 << (i % (sizeof(long) * 8))))
+
+void
+util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots)
+{
+   int first = -1;
+   int last = -1;
+   unsigned i;
+
+   memset(layout, 0xff, num_slots);
+
+   UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+   {
+      if(first < 0)
+	 first = i;
+      last = i;
+   }
+
+   if(last < efficient_slots)
+   {
+      UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+         layout[i] = i;
+   }
+   else if((last - first) < efficient_slots)
+   {
+      UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+         layout[i - first] = i;
+   }
+   else
+   {
+      unsigned idx = 0;
+      UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+         layout[idx++] = i;
+   }
+}
diff --git a/src/gallium/auxiliary/util/u_linkage.h b/src/gallium/auxiliary/util/u_linkage.h
new file mode 100644
index 0000000000..c30b56e6e4
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linkage.h
@@ -0,0 +1,65 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_LINKAGE_H_
+#define U_LINKAGE_H_
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+
+struct util_semantic_set
+{
+   unsigned long masks[256 / 8 / sizeof(unsigned long)];
+};
+
+static INLINE bool
+util_semantic_set_contains(struct util_semantic_set *set, unsigned char value)
+{
+   return !!(set->masks[value / (sizeof(long) * 8)] & (1 << (value / (sizeof(long) * 8))));
+}
+
+unsigned util_semantic_set_from_program_file(struct util_semantic_set *set, const struct tgsi_token *tokens, enum tgsi_file_type file);
+
+/* efficient_slots is the number of slots such that hardware performance is
+ * the same for using that amount, with holes, or less slots but with less
+ * holes.
+ *
+ * num_slots is the size of the layout array and hardware limit instead.
+ *
+ * efficient_slots == 0 or efficient_solts == num_slots are typical settings.
+ */
+void util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots);
+
+static INLINE void
+util_semantic_table_from_layout(unsigned char *table, unsigned char *layout, unsigned char first_slot_value, unsigned char num_slots)
+{
+   memset(table, 0xff, sizeof(table));
+
+   for(int i = 0; i < num_slots; ++i)
+      table[layout[i]] = first_slot_value + i;
+}
+
+#endif /* U_LINKAGE_H_ */
-- 
cgit v1.2.3


From 683118ccf2faa5ed0becb4cde6c00516f4f4afdb Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 21 Aug 2010 12:21:59 -0700
Subject: auxiliary: Reorder list of files in Makefile.

This patch reorders the list of files so that the order is more alphabetic.
---
 src/gallium/auxiliary/Makefile | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index a24b038974..eb86d83d2a 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -4,8 +4,8 @@ include $(TOP)/configs/current
 LIBNAME = gallium
 
 C_SOURCES = \
-	cso_cache/cso_context.c \
 	cso_cache/cso_cache.c \
+	cso_cache/cso_context.c \
 	cso_cache/cso_hash.c \
 	draw/draw_context.c \
 	draw/draw_gs.c \
@@ -37,21 +37,21 @@ C_SOURCES = \
 	draw/draw_pt_vsplit.c \
 	draw/draw_vertex.c \
 	draw/draw_vs.c \
-	draw/draw_vs_varient.c \
 	draw/draw_vs_aos.c \
 	draw/draw_vs_aos_io.c \
 	draw/draw_vs_aos_machine.c \
 	draw/draw_vs_exec.c \
 	draw/draw_vs_ppc.c \
 	draw/draw_vs_sse.c \
+	draw/draw_vs_varient.c \
 	indices/u_indices_gen.c \
 	indices/u_unfilled_gen.c \
 	os/os_misc.c \
 	os/os_stream.c \
 	os/os_stream_log.c \
+	os/os_stream_null.c \
 	os/os_stream_stdc.c \
 	os/os_stream_str.c \
-	os/os_stream_null.c \
 	os/os_time.c \
 	pipebuffer/pb_buffer_fenced.c \
 	pipebuffer/pb_buffer_malloc.c \
@@ -64,17 +64,16 @@ C_SOURCES = \
 	pipebuffer/pb_bufmgr_slab.c \
 	pipebuffer/pb_validate.c \
 	rbug/rbug_connection.c \
+	rbug/rbug_context.c \
 	rbug/rbug_core.c \
+	rbug/rbug_demarshal.c \
 	rbug/rbug_texture.c \
-	rbug/rbug_context.c \
 	rbug/rbug_shader.c \
-	rbug/rbug_demarshal.c \
 	rtasm/rtasm_cpu.c \
 	rtasm/rtasm_execmem.c \
-	rtasm/rtasm_x86sse.c \
 	rtasm/rtasm_ppc.c \
 	rtasm/rtasm_ppc_spe.c \
-	tgsi/tgsi_sanity.c \
+	rtasm/rtasm_x86sse.c \
 	tgsi/tgsi_build.c \
 	tgsi/tgsi_dump.c \
 	tgsi/tgsi_exec.c \
@@ -82,21 +81,22 @@ C_SOURCES = \
 	tgsi/tgsi_iterate.c \
 	tgsi/tgsi_parse.c \
 	tgsi/tgsi_ppc.c \
+	tgsi/tgsi_sanity.c \
 	tgsi/tgsi_scan.c \
 	tgsi/tgsi_sse2.c \
 	tgsi/tgsi_text.c \
 	tgsi/tgsi_transform.c \
 	tgsi/tgsi_ureg.c \
 	tgsi/tgsi_util.c \
-	translate/translate_generic.c \
-	translate/translate_sse.c \
 	translate/translate.c \
 	translate/translate_cache.c \
+	translate/translate_generic.c \
+	translate/translate_sse.c \
 	util/u_debug.c \
 	util/u_debug_describe.c \
 	util/u_debug_refcnt.c \
-	util/u_debug_symbol.c \
 	util/u_debug_stack.c \
+	util/u_debug_symbol.c \
 	util/u_dump_defines.c \
 	util/u_dump_state.c \
 	util/u_bitmask.c \
@@ -119,8 +119,8 @@ C_SOURCES = \
 	util/u_gen_mipmap.c \
 	util/u_half.c \
 	util/u_handle_table.c \
-	util/u_hash_table.c \
 	util/u_hash.c \
+	util/u_hash_table.c \
 	util/u_keymap.c \
 	util/u_linear.c \
 	util/u_linkage.c \
@@ -174,10 +174,10 @@ GALLIVM_SOURCES = \
         gallivm/lp_bld_tgsi_soa.c \
         gallivm/lp_bld_type.c \
         draw/draw_llvm.c \
-        draw/draw_vs_llvm.c \
-        draw/draw_pt_fetch_shade_pipeline_llvm.c \
+        draw/draw_llvm_sample.c \
         draw/draw_llvm_translate.c \
-        draw/draw_llvm_sample.c
+        draw/draw_vs_llvm.c \
+        draw/draw_pt_fetch_shade_pipeline_llvm.c
 
 GALLIVM_CPP_SOURCES = \
     gallivm/lp_bld_misc.cpp
-- 
cgit v1.2.3


From 15d558c306da649578ea3539062972ed3a18cd15 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 21 Aug 2010 12:32:17 -0700
Subject: auxiliary: Add missing files to SCons build.

Add u_linear.c and u_linkages.c to SCons build.
Reorder list of files to be more alphabetical.
---
 src/gallium/auxiliary/SConscript | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 93bfe9f01f..6210ada990 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -50,10 +50,11 @@ env.Depends('util/u_format_table.c', [
 ])
 
 source = [
-    'cso_cache/cso_context.c',
     'cso_cache/cso_cache.c',
+    'cso_cache/cso_context.c',
     'cso_cache/cso_hash.c',
     'draw/draw_context.c',
+    'draw/draw_gs.c',
     'draw/draw_pipe.c',
     'draw/draw_pipe_aaline.c',
     'draw/draw_pipe_aapoint.c',
@@ -89,7 +90,6 @@ source = [
     'draw/draw_vs_ppc.c',
     'draw/draw_vs_sse.c',
     'draw/draw_vs_varient.c',
-    'draw/draw_gs.c',
     #'indices/u_indices.c',
     #'indices/u_unfilled_indices.c',
     'indices/u_indices_gen.c',
@@ -97,9 +97,9 @@ source = [
     'os/os_misc.c',
     'os/os_stream.c',
     'os/os_stream_log.c',
+    'os/os_stream_null.c',
     'os/os_stream_stdc.c',
     'os/os_stream_str.c',
-    'os/os_stream_null.c',
     'os/os_time.c',
     'pipebuffer/pb_buffer_fenced.c',
     'pipebuffer/pb_buffer_malloc.c',
@@ -111,35 +111,35 @@ source = [
     'pipebuffer/pb_bufmgr_pool.c',
     'pipebuffer/pb_bufmgr_slab.c',
     'pipebuffer/pb_validate.c',
+    'rbug/rbug_connection.c',
+    'rbug/rbug_context.c',
     'rbug/rbug_core.c',
+    'rbug/rbug_demarshal.c',
     'rbug/rbug_shader.c',
-    'rbug/rbug_context.c',
     'rbug/rbug_texture.c',
-    'rbug/rbug_demarshal.c',
-    'rbug/rbug_connection.c',
     'rtasm/rtasm_cpu.c',
     'rtasm/rtasm_execmem.c',
-    'rtasm/rtasm_x86sse.c',
     'rtasm/rtasm_ppc.c',
     'rtasm/rtasm_ppc_spe.c',
+    'rtasm/rtasm_x86sse.c',
     'tgsi/tgsi_build.c',
     'tgsi/tgsi_dump.c',
     'tgsi/tgsi_exec.c',
     'tgsi/tgsi_info.c',
     'tgsi/tgsi_iterate.c',
     'tgsi/tgsi_parse.c',
+    'tgsi/tgsi_ppc.c',
     'tgsi/tgsi_sanity.c',
     'tgsi/tgsi_scan.c',
-    'tgsi/tgsi_ppc.c',
     'tgsi/tgsi_sse2.c',
     'tgsi/tgsi_text.c',
     'tgsi/tgsi_transform.c',
     'tgsi/tgsi_ureg.c',
     'tgsi/tgsi_util.c',
-    'translate/translate_generic.c',
-    'translate/translate_sse.c',
     'translate/translate.c',
     'translate/translate_cache.c',
+    'translate/translate_generic.c',
+    'translate/translate_sse.c',
     'util/u_bitmask.c',
     'util/u_blit.c',
     'util/u_blitter.c',
@@ -171,6 +171,8 @@ source = [
     'util/u_hash.c',
     'util/u_hash_table.c',
     'util/u_keymap.c',
+    'util/u_linear.c',
+    'util/u_linkage.c',
     'util/u_network.c',
     'util/u_math.c',
     'util/u_mempool.c',
@@ -209,9 +211,9 @@ if env['llvm']:
     'gallivm/lp_bld_format_soa.c',
     'gallivm/lp_bld_format_yuv.c',
     'gallivm/lp_bld_gather.c',
+    'gallivm/lp_bld_init.c',
     'gallivm/lp_bld_intr.c',
     'gallivm/lp_bld_logic.c',
-    'gallivm/lp_bld_init.c',
     'gallivm/lp_bld_misc.cpp',
     'gallivm/lp_bld_pack.c',
     'gallivm/lp_bld_printf.c',
@@ -223,10 +225,10 @@ if env['llvm']:
     'gallivm/lp_bld_tgsi_soa.c',
     'gallivm/lp_bld_type.c',
     'draw/draw_llvm.c',
-    'draw/draw_pt_fetch_shade_pipeline_llvm.c',
+    'draw/draw_llvm_sample.c',
     'draw/draw_llvm_translate.c',
-    'draw/draw_vs_llvm.c',
-    'draw/draw_llvm_sample.c'
+    'draw/draw_pt_fetch_shade_pipeline_llvm.c',
+    'draw/draw_vs_llvm.c'
     ]
 
 gallium = env.ConvenienceLibrary(
-- 
cgit v1.2.3


From 0d96cbe4a5e0a39c17db007f3815868c6a766382 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 21 Aug 2010 21:58:22 +0100
Subject: gallivm: Emit DIVPS instead of RCPPS.

See comments for detailed rationale.

Thanks to Michal Krol and Zack Rusin for detecting and investigating this
in detail.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 36 +++++++++++++++++++----------
 1 file changed, 24 insertions(+), 12 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 7b35dd4bb4..bb30e6e9df 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -59,14 +59,6 @@
 #include "lp_bld_arit.h"
 
 
-/*
- * XXX: Increasing eliminates some artifacts, but adds others, most
- * noticeably corruption in the Earth halo in Google Earth.
- */
-#define RCP_NEWTON_STEPS 0
-
-#define RSQRT_NEWTON_STEPS 0
-
 #define EXP_POLY_DEGREE 3
 
 #define LOG_POLY_DEGREE 5
@@ -1266,6 +1258,11 @@ lp_build_sqrt(struct lp_build_context *bld,
  *
  *   x_{i+1} = x_i * (2 - a * x_i)
  *
+ * XXX: Unfortunately this won't give IEEE-754 conformant results for 0 or
+ * +/-Inf, giving NaN instead.  Certain applications rely on this behavior,
+ * such as Google Earth, which does RCP(RSQRT(0.0) when drawing the Earth's
+ * halo. It would be necessary to clamp the argument to prevent this.
+ *
  * See also:
  * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
  * - http://softwarecommunity.intel.com/articles/eng/1818.htm
@@ -1306,13 +1303,27 @@ lp_build_rcp(struct lp_build_context *bld,
    if(LLVMIsConstant(a))
       return LLVMConstFDiv(bld->one, a);
 
-   if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+   /*
+    * We don't use RCPPS because:
+    * - it only has 10bits of precision
+    * - it doesn't even get the reciprocate of 1.0 exactly
+    * - doing Newton-Rapshon steps yields wrong (NaN) values for 0.0 or Inf
+    * - for recent processors the benefit over DIVPS is marginal, a case
+    *   depedent
+    *
+    * We could still use it on certain processors if benchmarks show that the
+    * RCPPS plus necessary workarounds are still preferrable to DIVPS; or for
+    * particular uses that require less workarounds.
+    */
+
+   if (FALSE && util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+      const unsigned num_iterations = 0;
       LLVMValueRef res;
       unsigned i;
 
       res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a);
 
-      for (i = 0; i < RCP_NEWTON_STEPS; ++i) {
+      for (i = 0; i < num_iterations; ++i) {
          res = lp_build_rcp_refine(bld, a, res);
       }
 
@@ -1363,13 +1374,14 @@ lp_build_rsqrt(struct lp_build_context *bld,
 
    assert(type.floating);
 
-   if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+   if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+      const unsigned num_iterations = 0;
       LLVMValueRef res;
       unsigned i;
 
       res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a);
 
-      for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) {
+      for (i = 0; i < num_iterations; ++i) {
          res = lp_build_rsqrt_refine(bld, a, res);
       }
 
-- 
cgit v1.2.3


From 4a6eb492e86f74434504766ec551130ac6306e6d Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 21 Aug 2010 14:36:29 -0700
Subject: util: Move loop variable declaration outside for loop.

Fixes build error with MSVC.
---
 src/gallium/auxiliary/util/u_linkage.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_linkage.h b/src/gallium/auxiliary/util/u_linkage.h
index c30b56e6e4..4720e0ee60 100644
--- a/src/gallium/auxiliary/util/u_linkage.h
+++ b/src/gallium/auxiliary/util/u_linkage.h
@@ -56,9 +56,10 @@ void util_semantic_layout_from_set(unsigned char *layout, const struct util_sema
 static INLINE void
 util_semantic_table_from_layout(unsigned char *table, unsigned char *layout, unsigned char first_slot_value, unsigned char num_slots)
 {
+   int i;
    memset(table, 0xff, sizeof(table));
 
-   for(int i = 0; i < num_slots; ++i)
+   for(i = 0; i < num_slots; ++i)
       table[layout[i]] = first_slot_value + i;
 }
 
-- 
cgit v1.2.3


From be99100ee78d7b97f616a375e47eb7d436fa4416 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 21 Aug 2010 15:48:25 -0700
Subject: util: Silence uninitialized variable warnings.

---
 src/gallium/auxiliary/util/u_linkage.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_linkage.c b/src/gallium/auxiliary/util/u_linkage.c
index cefcb4c9f1..2f6f41ba84 100644
--- a/src/gallium/auxiliary/util/u_linkage.c
+++ b/src/gallium/auxiliary/util/u_linkage.c
@@ -63,7 +63,11 @@ util_semantic_set_from_program_file(struct util_semantic_set *set, const struct
       semantic_index = info.output_semantic_index;
    }
    else
+   {
       assert(0);
+      semantic_name = NULL;
+      semantic_index = NULL;
+   }
 
    tgsi_parse_init(&parse, tokens);
 
-- 
cgit v1.2.3


From ae34a6393e6519dc32e53fa8407155e8679fc257 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 22 Aug 2010 02:26:09 +0100
Subject: draw: Don't assert if indices point outside vertex buffer.

This is valid input, and asserting here does causes the test suites that
verify this to crash.

Also, the assert was wrongly accepting the case

  max_index == vert_info->count

which, IIUC, is the first vertex outside the buffer. Assuming the
vert_info->count is precise (which often is not the case).
---
 src/gallium/auxiliary/draw/draw_pipe.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index b75262a357..6206197dae 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -238,7 +238,7 @@ void draw_pipeline_run( struct draw_context *draw,
       const unsigned count = prim_info->primitive_lengths[i];
 
 #if DEBUG
-      /* make sure none of the element indexes go outside the vertex buffer */
+      /* Warn if one of the element indexes go outside the vertex buffer */
       {
          unsigned max_index = 0x0, i;
          /* find the largest element index */
@@ -247,7 +247,12 @@ void draw_pipeline_run( struct draw_context *draw,
             if (index > max_index)
                max_index = index;
          }
-         assert(max_index <= vert_info->count);
+         if (max_index >= vert_info->count) {
+            debug_printf("%s: max_index (%u) outside vertex buffer (%u)\n",
+                         __FUNCTION__,
+                         max_index,
+                         vert_info->count);
+         }
       }
 #endif
 
-- 
cgit v1.2.3


From 2a7493ada4503db855ed35031d48fcf2a31eded3 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 21 Aug 2010 23:24:28 -0700
Subject: translate_sse: Silence uninitialized variable warnings.

Initialize variables on error paths.
---
 src/gallium/auxiliary/translate/translate_sse.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 06b8f32fe6..5188e49cd5 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -553,6 +553,13 @@ static boolean translate_attr_convert( struct translate_sse *p,
                case 32:
                   factor = get_inv_2147483647(p);
                   break;
+               default:
+                  assert(0);
+                  factor.disp = 0;
+                  factor.file = 0;
+                  factor.idx = 0;
+                  factor.mod = 0;
+                  break;
                }
                sse_mulps(p->func, dataXMM, factor);
             }
@@ -596,6 +603,13 @@ static boolean translate_attr_convert( struct translate_sse *p,
                case 32:
                   factor = get_inv_2147483647(p);
                   break;
+               default:
+                  assert(0);
+                  factor.disp = 0;
+                  factor.file = 0;
+                  factor.idx = 0;
+                  factor.mod = 0;
+                  break;
                }
                sse_mulps(p->func, dataXMM, factor);
             }
-- 
cgit v1.2.3


From 0f3b3751b8643352dcc242567b3696bd1505df1d Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 21 Aug 2010 23:28:52 -0700
Subject: util: Define dump_cpu only for DEBUG builds.

dump_cpu is used only when DEBUG is defined.

Fixes the following GCC warning on builds without DEBUG defined.
util/u_cpu_detect.c:76: warning: 'debug_get_option_dump_cpu' defined but not used
---
 src/gallium/auxiliary/util/u_cpu_detect.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index f33d6b3461..5d0b16d28e 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -73,7 +73,9 @@
 #endif
 
 
+#if DEBUG
 DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE)
+#endif
 
 
 struct util_cpu_caps util_cpu_caps;
-- 
cgit v1.2.3


From 3bdbccef2adfc699a737d7d25911004938bbbfcc Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 21 Aug 2010 23:36:30 -0700
Subject: util: Use #ifdef instead of #if.

This is a typo fix of earlier commit 0f3b3751b8643352dcc242567b3696bd1505df1d.
---
 src/gallium/auxiliary/util/u_cpu_detect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 5d0b16d28e..32519b148b 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -73,7 +73,7 @@
 #endif
 
 
-#if DEBUG
+#ifdef DEBUG
 DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE)
 #endif
 
-- 
cgit v1.2.3


From a1de6f48c3fa79bbc8f2514da19b3e01138e7093 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sat, 21 Aug 2010 22:51:38 +0100
Subject: draw: reduce the size of the llvm variant key

---
 src/gallium/auxiliary/draw/draw_llvm.c             | 67 ++++++++++++++--------
 src/gallium/auxiliary/draw/draw_llvm.h             | 54 ++++++++++++++---
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c       | 12 ++--
 src/gallium/auxiliary/draw/draw_vs_llvm.c          |  5 ++
 4 files changed, 100 insertions(+), 38 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 58d3e345e5..8759c38cab 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -285,15 +285,23 @@ draw_llvm_destroy(struct draw_llvm *llvm)
 }
 
 struct draw_llvm_variant *
-draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs)
+draw_llvm_create_variant(struct draw_llvm *llvm,
+			 unsigned num_inputs,
+			 const struct draw_llvm_variant_key *key)
 {
-   struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
+   struct draw_llvm_variant *variant;
    struct llvm_vertex_shader *shader =
       llvm_vertex_shader(llvm->draw->vs.vertex_shader);
 
+   variant = MALLOC(sizeof *variant +
+		    shader->variant_key_size -
+		    sizeof variant->key);
+   if (variant == NULL)
+      return NULL;
+
    variant->llvm = llvm;
 
-   draw_llvm_make_variant_key(llvm, &variant->key);
+   memcpy(&variant->key, key, shader->variant_key_size);
 
    llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
 
@@ -731,8 +739,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
 
    /* code generated texture sampling */
-   sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
-                                          context_ptr);
+   sampler = draw_llvm_sampler_soa_create(
+      draw_llvm_variant_key_samplers(&variant->key),
+      context_ptr);
 
 #if DEBUG_STORE
    lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
@@ -894,8 +903,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
    step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
 
    /* code generated texture sampling */
-   sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
-                                          context_ptr);
+   sampler = draw_llvm_sampler_soa_create(
+      draw_llvm_variant_key_samplers(&variant->key),
+      context_ptr);
 
    fetch_max = LLVMBuildSub(builder, fetch_count,
                             LLVMConstInt(LLVMInt32Type(), 1, 0),
@@ -995,35 +1005,42 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
    lp_func_delete_body(variant->function_elts);
 }
 
-void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
-                           struct draw_llvm_variant_key *key)
+
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
 {
    unsigned i;
+   struct draw_llvm_variant_key *key;
+   struct lp_sampler_static_state *sampler;
 
-   memset(key, 0, sizeof(struct draw_llvm_variant_key));
+   key = (struct draw_llvm_variant_key *)store;
 
+   /* Presumably all variants of the shader should have the same
+    * number of vertex elements - ie the number of shader inputs.
+    */
    key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
 
+   /* All variants of this shader will have the same value for
+    * nr_samplers.  Not yet trying to compact away holes in the
+    * sampler array.
+    */
+   key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+
+   sampler = draw_llvm_variant_key_samplers(key);
+
    memcpy(key->vertex_element,
           llvm->draw->pt.vertex_element,
           sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
+   
+   memset(sampler, 0, key->nr_samplers * sizeof *sampler);
 
-   memcpy(&key->vs,
-          &llvm->draw->vs.vertex_shader->state,
-          sizeof(struct pipe_shader_state));
-
-   /* if the driver implemented the sampling hooks then
-    * setup our sampling state */
-   if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) {
-      for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) {
-         struct draw_vertex_shader *shader = llvm->draw->vs.vertex_shader;
-         if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
-            lp_sampler_static_state(&key->sampler[i],
-                                    llvm->draw->sampler_views[i],
-                                    llvm->draw->samplers[i]);
-      }
+   for (i = 0 ; i < key->nr_samplers; i++) {
+      lp_sampler_static_state(&sampler[i],
+			      llvm->draw->sampler_views[i],
+			      llvm->draw->samplers[i]);
    }
+
+   return key;
 }
 
 void
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index 4addb47d2d..6196b2f983 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -151,12 +151,43 @@ typedef void
 
 struct draw_llvm_variant_key
 {
-   struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-   unsigned                   nr_vertex_elements;
-   struct pipe_shader_state   vs;
-   struct lp_sampler_static_state sampler[PIPE_MAX_VERTEX_SAMPLERS];
+   unsigned nr_vertex_elements:16;
+   unsigned nr_samplers:16;
+
+   /* Variable number of vertex elements:
+    */
+   struct pipe_vertex_element vertex_element[1];
+
+   /* Followed by variable number of samplers:
+    */
+/*   struct lp_sampler_static_state sampler; */
 };
 
+#define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \
+   (sizeof(struct draw_llvm_variant_key) +	\
+    PIPE_MAX_VERTEX_SAMPLERS * sizeof(struct lp_sampler_static_state) +	\
+    (PIPE_MAX_ATTRIBS-1) * sizeof(struct pipe_vertex_element))
+
+
+static INLINE size_t
+draw_llvm_variant_key_size(unsigned nr_vertex_elements,
+			   unsigned nr_samplers)
+{
+   return (sizeof(struct draw_llvm_variant_key) +
+	   nr_samplers * sizeof(struct lp_sampler_static_state) +
+	   (nr_vertex_elements - 1) * sizeof(struct pipe_vertex_element));
+}
+
+
+static INLINE struct lp_sampler_static_state *
+draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key)
+{
+   return (struct lp_sampler_static_state *)
+      &key->vertex_element[key->nr_vertex_elements];
+}
+
+
+
 struct draw_llvm_variant_list_item
 {
    struct draw_llvm_variant *base;
@@ -165,7 +196,6 @@ struct draw_llvm_variant_list_item
 
 struct draw_llvm_variant
 {
-   struct draw_llvm_variant_key key;
    LLVMValueRef function;
    LLVMValueRef function_elts;
    draw_jit_vert_func jit_func;
@@ -176,11 +206,16 @@ struct draw_llvm_variant
    struct draw_llvm *llvm;
    struct draw_llvm_variant_list_item list_item_global;
    struct draw_llvm_variant_list_item list_item_local;
+
+   /* key is variable-sized, must be last */
+   struct draw_llvm_variant_key key;
+   /* key is variable-sized, must be last */
 };
 
 struct llvm_vertex_shader {
    struct draw_vertex_shader base;
 
+   unsigned variant_key_size;
    struct draw_llvm_variant_list_item variants;
    unsigned variants_created;
    unsigned variants_cached;
@@ -220,14 +255,15 @@ void
 draw_llvm_destroy(struct draw_llvm *llvm);
 
 struct draw_llvm_variant *
-draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs);
+draw_llvm_create_variant(struct draw_llvm *llvm,
+			 unsigned num_vertex_header_attribs,
+			 const struct draw_llvm_variant_key *key);
 
 void
 draw_llvm_destroy_variant(struct draw_llvm_variant *variant);
 
-void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
-                           struct draw_llvm_variant_key *key);
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store);
 
 LLVMValueRef
 draw_llvm_translate_from(LLVMBuilderRef builder,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 78b1bf988c..cc0b4e5232 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -66,7 +66,8 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
    struct draw_context *draw = fpme->draw;
    struct llvm_vertex_shader *shader =
       llvm_vertex_shader(draw->vs.vertex_shader);
-   struct draw_llvm_variant_key key;
+   char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
+   struct draw_llvm_variant_key *key;
    struct draw_llvm_variant *variant = NULL;
    struct draw_llvm_variant_list_item *li;
    unsigned i;
@@ -125,11 +126,14 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
       *max_vertices = 4096;
    }
 
-   draw_llvm_make_variant_key(fpme->llvm, &key);
+   /* return even number */
+   *max_vertices = *max_vertices & ~1;
+   
+   key = draw_llvm_make_variant_key(fpme->llvm, store);
 
    li = first_elem(&shader->variants);
    while(!at_end(&shader->variants, li)) {
-      if(memcmp(&li->base->key, &key, sizeof key) == 0) {
+      if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
          variant = li->base;
          break;
       }
@@ -152,7 +156,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
          }
       }
 
-      variant = draw_llvm_create_variant(fpme->llvm, nr);
+      variant = draw_llvm_create_variant(fpme->llvm, nr, key);
 
       if (variant) {
          insert_at_head(&shader->variants, &variant->list_item_local);
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index d13ad24fff..0014863454 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -109,6 +109,11 @@ draw_create_vs_llvm(struct draw_context *draw,
 
    tgsi_scan_shader(state->tokens, &vs->base.info);
 
+   vs->variant_key_size = 
+      draw_llvm_variant_key_size(
+	 vs->base.info.file_max[TGSI_FILE_INPUT]+1,
+	 vs->base.info.file_max[TGSI_FILE_SAMPLER]+1);
+
    vs->base.draw = draw;
    vs->base.prepare = vs_llvm_prepare;
    vs->base.run_linear = vs_llvm_run_linear;
-- 
cgit v1.2.3


From 4f3fedcef735cda34aa04416950b5febb64435fc Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sun, 22 Aug 2010 17:11:22 +0100
Subject: translate_sse: refactor constant management

---
 src/gallium/auxiliary/translate/translate_sse.c | 157 ++++++++++++------------
 1 file changed, 76 insertions(+), 81 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 5188e49cd5..3fcd120ed1 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -62,6 +62,30 @@ struct translate_buffer_varient {
 
 #define ELEMENT_BUFFER_INSTANCE_ID  1001
 
+#define NUM_CONSTS 7
+
+enum
+{
+   CONST_IDENTITY,
+   CONST_INV_127,
+   CONST_INV_255,
+   CONST_INV_32767,
+   CONST_INV_65535,
+   CONST_INV_2147483647,
+   CONST_255
+};
+
+#define C(v) {(float)(v), (float)(v), (float)(v), (float)(v)}
+static float consts[NUM_CONSTS][4] = {
+      {0, 0, 0, 1},
+      C(1.0 / 127.0),
+      C(1.0 / 255.0),
+      C(1.0 / 32767.0),
+      C(1.0 / 65535.0),
+      C(1.0 / 2147483647.0),
+      C(255.0)
+};
+#undef C
 
 struct translate_sse {
    struct translate translate;
@@ -72,11 +96,9 @@ struct translate_sse {
    struct x86_function elt8_func;
    struct x86_function *func;
 
-   boolean loaded_identity;
-   boolean loaded_const[5];
-
-   float identity[4];
-   float const_value[5][4];
+   PIPE_ALIGN_VAR(16) float consts[NUM_CONSTS][4];
+   int8_t reg_to_const[16];
+   int8_t const_to_reg[NUM_CONSTS];
 
    struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
    unsigned nr_buffers;
@@ -108,69 +130,38 @@ static int get_offset( const void *a, const void *b )
    return (const char *)b - (const char *)a;
 }
 
-
-
-static struct x86_reg get_identity( struct translate_sse *p )
+static struct x86_reg get_const( struct translate_sse *p, unsigned id)
 {
-   struct x86_reg reg = x86_make_reg(file_XMM, 7);
-
-   if (!p->loaded_identity) {
-      p->loaded_identity = TRUE;
-      p->identity[0] = 0;
-      p->identity[1] = 0;
-      p->identity[2] = 0;
-      p->identity[3] = 1;
-
-      sse_movups(p->func, reg, 
-		 x86_make_disp(p->machine_EDI,
-			       get_offset(p, &p->identity[0])));
-   }
+   struct x86_reg reg;
+   unsigned i;
 
-   return reg;
-}
+   if(p->const_to_reg[id] >= 0)
+      return x86_make_reg(file_XMM, p->const_to_reg[id]);
 
-static struct x86_reg get_const( struct translate_sse *p, unsigned i, float v)
-{
-   struct x86_reg reg = x86_make_reg(file_XMM, 2 + i);
-
-   if (!p->loaded_const[i]) {
-      p->loaded_const[i] = TRUE;
-      p->const_value[i][0] =
-         p->const_value[i][1] =
-         p->const_value[i][2] =
-         p->const_value[i][3] = v;
-
-      sse_movups(p->func, reg,
-                 x86_make_disp(p->machine_EDI,
-                               get_offset(p, &p->const_value[i][0])));
+   for(i = 2; i < 8; ++i)
+   {
+      if(p->reg_to_const[i] < 0)
+         break;
    }
 
-   return reg;
-}
+   /* TODO: be smarter here */
+   if(i == 8)
+      --i;
 
-static struct x86_reg get_inv_127( struct translate_sse *p )
-{
-   return get_const(p, 0, 1.0f / 127.0f);
-}
+   reg = x86_make_reg(file_XMM, i);
 
-static struct x86_reg get_inv_255( struct translate_sse *p )
-{
-   return get_const(p, 1, 1.0f / 255.0f);
-}
+   if(p->reg_to_const[i] >= 0)
+      p->const_to_reg[p->reg_to_const[i]] = -1;
 
-static struct x86_reg get_inv_32767( struct translate_sse *p )
-{
-   return get_const(p, 2, 1.0f / 32767.0f);
-}
+   p->reg_to_const[i] = id;
+   p->const_to_reg[id] = i;
 
-static struct x86_reg get_inv_65535( struct translate_sse *p )
-{
-   return get_const(p, 3, 1.0f / 65535.0f);
-}
+   /* TODO: this should happen outside the loop, if possible */
+   sse_movaps(p->func, reg,
+         x86_make_disp(p->machine_EDI,
+               get_offset(p, &p->consts[id][0])));
 
-static struct x86_reg get_inv_2147483647( struct translate_sse *p )
-{
-   return get_const(p, 4, 1.0f / 2147483647.0f);
+   return reg;
 }
 
 /* load the data in a SSE2 register, padding with zeros */
@@ -247,16 +238,16 @@ static void emit_load_float32( struct translate_sse *p,
        */
       sse_movss(p->func, data, arg0);
       if(out_chans == CHANNELS_0001)
-         sse_orps(p->func, data, get_identity(p) );
+         sse_orps(p->func, data, get_const(p, CONST_IDENTITY) );
       break;
    case 2:
       /* 0 0 0 1
        * a b 0 1
        */
       if(out_chans == CHANNELS_0001)
-         sse_shufps(p->func, data, get_identity(p), SHUF(X, Y, Z, W) );
+         sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
       else if(out_chans > 2)
-         sse_movlhps(p->func, data, get_identity(p) );
+         sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) );
       sse_movlps(p->func, data, arg0);
       break;
    case 3:
@@ -269,7 +260,7 @@ static void emit_load_float32( struct translate_sse *p,
        */
       sse_movss(p->func, data, x86_make_disp(arg0, 8));
       if(out_chans == CHANNELS_0001)
-         sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
+         sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X,Y,Z,W) );
       sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
       sse_movlps(p->func, data, arg0);
       break;
@@ -298,15 +289,15 @@ static void emit_load_float64to32( struct translate_sse *p,
       else
          sse2_cvtsd2ss(p->func, data, data);
       if(out_chans == CHANNELS_0001)
-         sse_shufps(p->func, data, get_identity(p), SHUF(X, Y, Z, W)  );
+         sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W)  );
       break;
    case 2:
       sse2_movupd(p->func, data, arg0);
       sse2_cvtpd2ps(p->func, data, data);
       if(out_chans == CHANNELS_0001)
-         sse_shufps(p->func, data, get_identity(p), SHUF(X, Y, Z, W) );
+         sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
       else if(out_chans > 2)
-         sse_movlhps(p->func, data, get_identity(p) );
+         sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) );
        break;
    case 3:
       sse2_movupd(p->func, data, arg0);
@@ -318,7 +309,7 @@ static void emit_load_float64to32( struct translate_sse *p,
          sse2_cvtsd2ss(p->func, tmpXMM, tmpXMM);
       sse_movlhps(p->func, data, tmpXMM);
       if(out_chans == CHANNELS_0001)
-         sse_orps(p->func, data, get_identity(p) );
+         sse_orps(p->func, data, get_const(p, CONST_IDENTITY) );
       break;
    case 4:
       sse2_movupd(p->func, data, arg0);
@@ -526,11 +517,11 @@ static boolean translate_attr_convert( struct translate_sse *p,
             {
             case 8:
                /* TODO: this may be inefficient due to get_identity() being used both as a float and integer register */
-               sse2_punpcklbw(p->func, dataXMM, get_identity(p));
-               sse2_punpcklbw(p->func, dataXMM, get_identity(p));
+               sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+               sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
                break;
             case 16:
-               sse2_punpcklwd(p->func, dataXMM, get_identity(p));
+               sse2_punpcklwd(p->func, dataXMM, get_const(p, CONST_IDENTITY));
                break;
             case 32: /* we lose precision here */
                sse2_psrld_imm(p->func, dataXMM, 1);
@@ -545,13 +536,13 @@ static boolean translate_attr_convert( struct translate_sse *p,
                switch(input_desc->channel[0].size)
                {
                case 8:
-                  factor = get_inv_255(p);
+                  factor = get_const(p, CONST_INV_255);
                   break;
                case 16:
-                  factor = get_inv_65535(p);
+                  factor = get_const(p, CONST_INV_65535);
                   break;
                case 32:
-                  factor = get_inv_2147483647(p);
+                  factor = get_const(p, CONST_INV_2147483647);
                   break;
                default:
                   assert(0);
@@ -595,13 +586,13 @@ static boolean translate_attr_convert( struct translate_sse *p,
                switch(input_desc->channel[0].size)
                {
                case 8:
-                  factor = get_inv_127(p);
+                  factor = get_const(p, CONST_INV_127);
                   break;
                case 16:
-                  factor = get_inv_32767(p);
+                  factor = get_const(p, CONST_INV_32767);
                   break;
                case 32:
-                  factor = get_inv_2147483647(p);
+                  factor = get_const(p, CONST_INV_2147483647);
                   break;
                default:
                   assert(0);
@@ -750,12 +741,12 @@ static boolean translate_attr_convert( struct translate_sse *p,
         	       sse2_psrlw_imm(p->func, dataXMM, 1);
             }
             else
-               sse2_punpcklbw(p->func, dataXMM, get_identity(p));
+               sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
             break;
          case UTIL_FORMAT_TYPE_SIGNED:
             if(input_desc->channel[0].normalized)
             {
-               sse2_movq(p->func, tmpXMM, get_identity(p));
+               sse2_movq(p->func, tmpXMM, get_const(p, CONST_IDENTITY));
                sse2_punpcklbw(p->func, tmpXMM, dataXMM);
                sse2_psllw_imm(p->func, dataXMM, 9);
                sse2_psrlw_imm(p->func, dataXMM, 8);
@@ -1020,6 +1011,7 @@ static boolean translate_attr_convert( struct translate_sse *p,
       }
       return TRUE;
    }
+
    return FALSE;
 }
 
@@ -1245,8 +1237,6 @@ static boolean build_vertex_emit( struct translate_sse *p,
    p->src_ECX     = x86_make_reg(file_REG32, reg_CX);
 
    p->func = func;
-   memset(&p->loaded_const, 0, sizeof(p->loaded_const));
-   p->loaded_identity = FALSE;
 
    x86_init_func(p->func);
 
@@ -1406,7 +1396,7 @@ static void translate_sse_release( struct translate *translate )
    x86_release_func( &p->linear_func );
    x86_release_func( &p->elt_func );
 
-   FREE(p);
+   os_free_aligned(p);
 }
 
 
@@ -1419,9 +1409,14 @@ struct translate *translate_sse2_create( const struct translate_key *key )
    if (!rtasm_cpu_has_sse())
       goto fail;
 
-   p = CALLOC_STRUCT( translate_sse );
+   p = os_malloc_aligned(sizeof(struct translate_sse), 16);
    if (p == NULL) 
       goto fail;
+   memset(p, 0, sizeof(*p));
+
+   memcpy(p->consts, consts, sizeof(consts));
+   memset(p->reg_to_const, 0xff, sizeof(p->reg_to_const));
+   memset(p->const_to_reg, 0xff, sizeof(p->const_to_reg));
 
    p->translate.key = *key;
    p->translate.release = translate_sse_release;
-- 
cgit v1.2.3


From 8e632666af494219c77072056e8ca0e9cd09f5fa Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <wallbraker@gmail.com>
Date: Sun, 22 Aug 2010 19:58:57 +0200
Subject: translate_sse: add R32G32B32A32_FLOAT -> X8X8X8X8_UNORM for EMIT_4UB

Changed by me to use movd instead of movss to avoid penalties.
---
 src/gallium/auxiliary/translate/translate_sse.c | 26 +++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 3fcd120ed1..5d555bbd98 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -1011,6 +1011,32 @@ static boolean translate_attr_convert( struct translate_sse *p,
       }
       return TRUE;
    }
+   /* special case for draw's EMIT_4UB (RGBA) and EMIT_4UB_BGRA */
+   else if((x86_target_caps(p->func) & X86_SSE2) &&
+         a->input_format == PIPE_FORMAT_R32G32B32A32_FLOAT && (0
+               || a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM
+               || a->output_format == PIPE_FORMAT_R8G8B8A8_UNORM
+         ))
+   {
+      struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+
+      /* load */
+      sse_movups(p->func, dataXMM, src);
+
+      if (a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM)
+         sse_shufps(p->func, dataXMM, dataXMM, SHUF(2,1,0,3));
+
+      /* scale by 255.0 */
+      sse_mulps(p->func, dataXMM, get_const(p, CONST_255));
+
+      /* pack and emit */
+      sse2_cvtps2dq(p->func, dataXMM, dataXMM);
+      sse2_packssdw(p->func, dataXMM, dataXMM);
+      sse2_packuswb(p->func, dataXMM, dataXMM);
+      sse2_movd(p->func, dst, dataXMM);
+
+      return TRUE;
+   }
 
    return FALSE;
 }
-- 
cgit v1.2.3


From 7945e143e0110398596311842309a88a6e455703 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sun, 22 Aug 2010 19:29:32 -0600
Subject: util: implement depth blitting in u_blit

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/util/u_blit.c | 60 ++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 17 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 6fb341eaf2..dfb142b9e1 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -42,6 +42,7 @@
 
 #include "util/u_blit.h"
 #include "util/u_draw_quad.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_sampler.h"
@@ -56,7 +57,8 @@ struct blit_state
    struct cso_context *cso;
 
    struct pipe_blend_state blend;
-   struct pipe_depth_stencil_alpha_state depthstencil;
+   struct pipe_depth_stencil_alpha_state depthstencil_keep;
+   struct pipe_depth_stencil_alpha_state depthstencil_write;
    struct pipe_rasterizer_state rasterizer;
    struct pipe_sampler_state sampler;
    struct pipe_viewport_state viewport;
@@ -66,6 +68,7 @@ struct blit_state
 
    void *vs;
    void *fs[TGSI_WRITEMASK_XYZW + 1];
+   void *fs_depth;
 
    struct pipe_resource *vbuf;  /**< quad vertices */
    unsigned vbuf_slot;
@@ -96,7 +99,11 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
    ctx->blend.rt[0].colormask = PIPE_MASK_RGBA;
 
    /* no-op depth/stencil/alpha */
-   memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil));
+   memset(&ctx->depthstencil_keep, 0, sizeof(ctx->depthstencil_keep));
+   memset(&ctx->depthstencil_write, 0, sizeof(ctx->depthstencil_write));
+   ctx->depthstencil_write.depth.enabled = 1;
+   ctx->depthstencil_write.depth.writemask = 1;
+   ctx->depthstencil_write.depth.func = PIPE_FUNC_ALWAYS;
 
    /* rasterizer */
    memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
@@ -169,6 +176,9 @@ util_destroy_blit(struct blit_state *ctx)
       if (ctx->fs[i])
          pipe->delete_fs_state(pipe, ctx->fs[i]);
 
+   if (ctx->fs_depth)
+      pipe->delete_fs_state(pipe, ctx->fs_depth);
+
    pipe_resource_reference(&ctx->vbuf, NULL);
 
    FREE(ctx);
@@ -276,7 +286,7 @@ regions_overlap(int srcX0, int srcY0,
  * \param writemask  controls which channels in the dest surface are sourced
  *                   from the src surface.  Disabled channels are sourced
  *                   from (0,0,0,1).
- * XXX need some control over blitting Z and/or stencil.
+ * XXX need some control over blitting stencil.
  */
 void
 util_blit_pixels_writemask(struct blit_state *ctx,
@@ -299,7 +309,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    const int srcW = abs(srcX1 - srcX0);
    const int srcH = abs(srcY1 - srcY0);
    unsigned offset;
-   boolean overlap;
+   boolean overlap, dst_is_depth;
    float s0, t0, s1, t1;
    boolean normalized;
 
@@ -444,14 +454,15 @@ util_blit_pixels_writemask(struct blit_state *ctx,
       }
    }
 
+   dst_is_depth = util_format_is_depth_or_stencil(dst->format);
 
    assert(screen->is_format_supported(screen, sampler_view->format, ctx->internal_target,
                                       sampler_view->texture->nr_samples,
                                       PIPE_BIND_SAMPLER_VIEW, 0));
    assert(screen->is_format_supported(screen, dst->format, ctx->internal_target,
                                       dst->texture->nr_samples,
-                                      PIPE_BIND_RENDER_TARGET, 0));
-
+                                      dst_is_depth ? PIPE_BIND_DEPTH_STENCIL :
+                                                     PIPE_BIND_RENDER_TARGET, 0));
    /* save state (restored below) */
    cso_save_blend(ctx->cso);
    cso_save_depth_stencil_alpha(ctx->cso);
@@ -467,7 +478,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
-   cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
+   cso_set_depth_stencil_alpha(ctx->cso,
+                               dst_is_depth ? &ctx->depthstencil_write :
+                                              &ctx->depthstencil_keep);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
    cso_set_clip(ctx->cso, &ctx->clip);
    cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
@@ -496,22 +509,35 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    /* texture */
    cso_set_fragment_sampler_views(ctx->cso, 1, &sampler_view);
 
-   if (ctx->fs[writemask] == NULL)
-      ctx->fs[writemask] =
-         util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
-                                                 TGSI_INTERPOLATE_LINEAR,
-                                                 writemask);
-
    /* shaders */
-   cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
+   if (dst_is_depth) {
+      if (ctx->fs_depth == NULL)
+         ctx->fs_depth =
+            util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D,
+                                                     TGSI_INTERPOLATE_LINEAR);
+
+      cso_set_fragment_shader_handle(ctx->cso, ctx->fs_depth);
+   } else {
+      if (ctx->fs[writemask] == NULL)
+         ctx->fs[writemask] =
+            util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
+                                                    TGSI_INTERPOLATE_LINEAR,
+                                                    writemask);
+
+      cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
+   }
    cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
 
    /* drawing dest */
    memset(&fb, 0, sizeof(fb));
    fb.width = dst->width;
    fb.height = dst->height;
-   fb.nr_cbufs = 1;
-   fb.cbufs[0] = dst;
+   if (dst_is_depth) {
+      fb.zsbuf = dst;
+   } else {
+      fb.nr_cbufs = 1;
+      fb.cbufs[0] = dst;
+   }
    cso_set_framebuffer(ctx->cso, &fb);
 
    /* draw quad */
@@ -644,7 +670,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
-   cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
+   cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil_keep);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
    cso_set_clip(ctx->cso, &ctx->clip);
    cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
-- 
cgit v1.2.3


From bfaa2577c6474222c79341c0d90685ed579f3414 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Mon, 23 Aug 2010 00:31:08 +0200
Subject: nvfx: support clip planes sensibly and fix them on nv30

Before, we were discarding the compiled vertex program on each
vertex program change.

Now we compile the program as if there were 6 clip planes and
dynamically patch in an "end program" bit at the right place.

Also, nv30 should now work.
---
 src/gallium/auxiliary/util/u_dynarray.h    |   3 +
 src/gallium/drivers/nvfx/nv30_vertprog.h   |   5 +-
 src/gallium/drivers/nvfx/nvfx_context.c    |   4 +
 src/gallium/drivers/nvfx/nvfx_context.h    |   1 +
 src/gallium/drivers/nvfx/nvfx_fragprog.c   |  13 ---
 src/gallium/drivers/nvfx/nvfx_state.h      |   4 +-
 src/gallium/drivers/nvfx/nvfx_state_emit.c |  82 ++++++++++++++
 src/gallium/drivers/nvfx/nvfx_vertprog.c   | 170 ++++++++++++-----------------
 8 files changed, 166 insertions(+), 116 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_dynarray.h b/src/gallium/auxiliary/util/u_dynarray.h
index 9d1c1713a7..980cadf22d 100644
--- a/src/gallium/auxiliary/util/u_dynarray.h
+++ b/src/gallium/auxiliary/util/u_dynarray.h
@@ -106,6 +106,9 @@ util_dynarray_trim(struct util_dynarray *buf)
 #define util_dynarray_pop_ptr(buf, type) (type*)((char*)(buf)->data + ((buf)->size -= sizeof(type)))
 #define util_dynarray_pop(buf, type) *util_dynarray_pop_ptr(buf, type)
 #define util_dynarray_contains(buf, type) ((buf)->size >= sizeof(type))
+#define util_dynarray_element(buf, type, idx) ((type*)(buf)->data + (idx))
+#define util_dynarray_begin(buf) ((buf)->data)
+#define util_dynarray_end(buf) ((void*)util_dynarray_element((buf), char, (buf)->size))
 
 #endif /* U_DYNARRAY_H */
 
diff --git a/src/gallium/drivers/nvfx/nv30_vertprog.h b/src/gallium/drivers/nvfx/nv30_vertprog.h
index df92469078..9a68f5c1fb 100644
--- a/src/gallium/drivers/nvfx/nv30_vertprog.h
+++ b/src/gallium/drivers/nvfx/nv30_vertprog.h
@@ -125,7 +125,7 @@
 #define NV30_VP_INST_VDEST_WRITEMASK_SHIFT      12    /*NV20*/
 #define NV30_VP_INST_VDEST_WRITEMASK_MASK      (0x0F << 12)  /*NV20*/
 #define NV30_VP_INST_DEST_SHIFT        2
-#define NV30_VP_INST_DEST_MASK        (0x0F <<  2)
+#define NV30_VP_INST_DEST_MASK        (0x1F <<  2)
 #  define NV30_VP_INST_DEST_POS  0
 #  define NV30_VP_INST_DEST_BFC0  1
 #  define NV30_VP_INST_DEST_BFC1  2
@@ -133,7 +133,8 @@
 #  define NV30_VP_INST_DEST_COL1  4
 #  define NV30_VP_INST_DEST_FOGC  5
 #  define NV30_VP_INST_DEST_PSZ   6
-#  define NV30_VP_INST_DEST_TC(n)  (8+n)
+#  define NV30_VP_INST_DEST_TC(n)  (8+(n))
+#  define NV30_VP_INST_DEST_CLP(n) (17 + (n))
 
 /* Useful to split the source selection regs into their pieces */
 #define NV30_VP_SRC0_HIGH_SHIFT                                                6
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
index 80b36fb7b9..2f775f92cf 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_context.c
@@ -75,6 +75,10 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
 	screen->base.channel->user_private = nvfx;
 
 	nvfx->is_nv4x = screen->is_nv4x;
+	/* TODO: it seems that nv30 might have fixed function clipping usable with vertex programs
+	 * However, my code for that doesn't work, so use vp clipping for all cards, which works.
+	 */
+	nvfx->use_vp_clipping = TRUE;
 
 	nvfx_init_query_functions(nvfx);
 	nvfx_init_surface_functions(nvfx);
diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h
index 2134f3c386..680f4c6ce0 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.h
+++ b/src/gallium/drivers/nvfx/nvfx_context.h
@@ -134,6 +134,7 @@ struct nvfx_context {
 	struct nvfx_screen *screen;
 
 	unsigned is_nv4x; /* either 0 or ~0 */
+	boolean use_vp_clipping;
 
 	struct draw_context *draw;
 	struct blitter_context* blitter;
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
index a7e43b1513..23a85c9342 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -1468,19 +1468,6 @@ update:
 			nvfx->hw_pointsprite_control = pointsprite_control;
 		}
 	}
-
-	if(nvfx->is_nv4x)
-	{
-		unsigned vp_output = vp->or | fp->or;
-
-		if(vp_output != nvfx->hw_vp_output)
-		{
-			WAIT_RING(chan, 2);
-			OUT_RING(chan, RING_3D(NV40TCL_VP_RESULT_EN, 1));
-			OUT_RING(chan, vp_output);
-			nvfx->hw_vp_output = vp_output;
-		}
-	}
 }
 
 void
diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h
index 3795191918..e9c1f2c26d 100644
--- a/src/gallium/drivers/nvfx/nvfx_state.h
+++ b/src/gallium/drivers/nvfx/nvfx_state.h
@@ -24,8 +24,6 @@ struct nvfx_vertex_program {
 
 	boolean translated;
 
-	struct pipe_clip_state ucp;
-
 	struct nvfx_vertex_program_exec *insns;
 	unsigned nr_insns;
 	struct nvfx_vertex_program_data *consts;
@@ -42,7 +40,7 @@ struct nvfx_vertex_program {
 
 	uint32_t ir;
 	uint32_t or;
-	uint32_t clip_ctrl;
+	int clip_nr;
 
 	struct util_dynarray branch_relocs;
 	struct util_dynarray const_relocs;
diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c
index bd89a385d7..c43a75aaa2 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
@@ -90,6 +90,74 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
 	if(dirty & NVFX_NEW_STIPPLE)
 		nvfx_state_stipple_validate(nvfx);
 
+       if(nvfx->dirty & NVFX_NEW_UCP)
+	{
+		unsigned enables[7] =
+		{
+				0,
+				NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0,
+				NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1,
+				NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2,
+				NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3,
+				NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4,
+				NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5,
+		};
+
+		if(!nvfx->use_vp_clipping)
+		{
+			WAIT_RING(chan, 2);
+			OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
+			OUT_RING(chan, 0);
+
+			WAIT_RING(chan, 6 * 4 + 1);
+			OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANE_A(0), nvfx->clip.nr * 4));
+			OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4);
+		}
+
+		WAIT_RING(chan, 2);
+		OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
+		OUT_RING(chan, enables[nvfx->clip.nr]);
+	}
+
+	if(nvfx->use_vp_clipping && (nvfx->dirty & (NVFX_NEW_UCP | NVFX_NEW_VERTPROG)))
+	{
+		unsigned i;
+		struct nvfx_vertex_program* vp = nvfx->vertprog;
+		if(nvfx->clip.nr != vp->clip_nr)
+		{
+			unsigned idx;
+			WAIT_RING(chan, 14);
+
+			/* remove last instruction bit */
+			if(vp->clip_nr >= 0)
+			{
+				idx = vp->nr_insns - 7 + vp->clip_nr;
+				OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_FROM_ID, 1));
+				OUT_RING(chan,  vp->exec->start + idx);
+				OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_INST(0), 4));
+				OUT_RINGp (chan, vp->insns[idx].data, 4);
+			}
+
+			 /* set last instruction bit */
+			idx = vp->nr_insns - 7 + nvfx->clip.nr;
+			OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_FROM_ID, 1));
+			OUT_RING(chan,  vp->exec->start + idx);
+			OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_INST(0), 4));
+			OUT_RINGp(chan, vp->insns[idx].data, 3);
+			OUT_RING(chan, vp->insns[idx].data[3] | 1);
+			vp->clip_nr = nvfx->clip.nr;
+		}
+
+		// TODO: only do this for the ones changed
+		WAIT_RING(chan, 6 * 6);
+		for(i = 0; i < nvfx->clip.nr; ++i)
+		{
+			OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_CONST_ID, 5));
+			OUT_RING(chan, vp->data->start + i);
+			OUT_RINGp (chan, nvfx->clip.ucp[i], 4);
+		}
+	}
+
 	if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST | NVFX_NEW_VERTPROG | NVFX_NEW_SPRITE))
 	{
 		nvfx_fragprog_validate(nvfx);
@@ -97,6 +165,20 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
 			flush_tex_cache = TRUE; // TODO: do we need this?
 	}
 
+	if(nvfx->is_nv4x)
+	{
+		unsigned vp_output = nvfx->vertprog->or | nvfx->hw_fragprog->or;
+		vp_output |= (1 << (nvfx->clip.nr + 6)) - (1 << 6);
+
+		if(vp_output != nvfx->hw_vp_output)
+		{
+			WAIT_RING(chan, 2);
+			OUT_RING(chan, RING_3D(NV40TCL_VP_RESULT_EN, 1));
+			OUT_RING(chan, vp_output);
+			nvfx->hw_vp_output = vp_output;
+		}
+	}
+
 	if(all_swizzled >= 0)
 		nvfx_framebuffer_validate(nvfx, all_swizzled);
 
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
index 3b8d3853b7..ea7e88c561 100644
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -29,8 +29,6 @@
 #include "nv30_vertprog.h"
 #include "nv40_vertprog.h"
 
-#define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
-
 struct nvfx_loop_entry
 {
 	unsigned brk_target;
@@ -205,52 +203,33 @@ emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot
 		break;
 	case NVFXSR_OUTPUT:
 		/* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */
-		switch (dst.index) {
-		case NVFX_VP_INST_DEST_CLIP(0):
-			vp->or |= (1 << 6);
-			vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0;
-			dst.index = NVFX_VP(INST_DEST_FOGC);
-			break;
-		case NVFX_VP_INST_DEST_CLIP(1):
-			vp->or |= (1 << 7);
-			vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1;
-			dst.index = NVFX_VP(INST_DEST_FOGC);
-			break;
-		case NVFX_VP_INST_DEST_CLIP(2):
-			vp->or |= (1 << 8);
-			vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2;
-			dst.index = NVFX_VP(INST_DEST_FOGC);
-			break;
-		case NVFX_VP_INST_DEST_CLIP(3):
-			vp->or |= (1 << 9);
-			vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3;
-			dst.index = NVFX_VP(INST_DEST_PSZ);
-			break;
-		case NVFX_VP_INST_DEST_CLIP(4):
-			vp->or |= (1 << 10);
-			vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4;
-			dst.index = NVFX_VP(INST_DEST_PSZ);
-			break;
-		case NVFX_VP_INST_DEST_CLIP(5):
-			vp->or |= (1 << 11);
-			vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5;
-			dst.index = NVFX_VP(INST_DEST_PSZ);
-			break;
-		default:
-			if(nvfx->is_nv4x) {
-				/* we don't need vp->or on nv3x
-				 * texcoords are handled by fragment program
-				 */
-				switch (dst.index) {
-				case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
-				case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
-				case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
-				case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
-				case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
-				case NV40_VP_INST_DEST_PSZ  : vp->or |= (1 << 5); break;
-				}
+		if(nvfx->is_nv4x) {
+			switch (dst.index) {
+			case NV30_VP_INST_DEST_CLP(0):
+				dst.index = NVFX_VP(INST_DEST_FOGC);
+				break;
+			case NV30_VP_INST_DEST_CLP(1):
+				dst.index = NVFX_VP(INST_DEST_FOGC);
+				break;
+			case NV30_VP_INST_DEST_CLP(2):
+				dst.index = NVFX_VP(INST_DEST_FOGC);
+				break;
+			case NV30_VP_INST_DEST_CLP(3):
+				dst.index = NVFX_VP(INST_DEST_PSZ);
+				break;
+			case NV30_VP_INST_DEST_CLP(4):
+				dst.index = NVFX_VP(INST_DEST_PSZ);
+				break;
+			case NV30_VP_INST_DEST_CLP(5):
+				dst.index = NVFX_VP(INST_DEST_PSZ);
+				break;
+			case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+			case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+			case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+			case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+			case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
+			case NV40_VP_INST_DEST_PSZ  : vp->or |= (1 << 5); break;
 			}
-			break;
 		}
 
 		if(!nvfx->is_nv4x) {
@@ -914,6 +893,13 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
 	vpc->nvfx = nvfx;
 	vpc->vp = vp;
 
+	/* reserve space for ucps */
+	if(nvfx->use_vp_clipping)
+	{
+		for(i = 0; i < 6; ++i)
+			constant(vpc, -1, 0, 0, 0, 0);
+	}
+
 	if (!nvfx_vertprog_prepare(nvfx, vpc)) {
 		FREE(vpc);
 		return;
@@ -923,7 +909,8 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
 	 * planes are enabled.  We need to append code to the vtxprog
 	 * to handle clip planes later.
 	 */
-	if (vp->ucp.nr)  {
+	/* TODO: maybe support patching this depending on whether there are ucps: not sure if it is really matters much */
+	if (nvfx->use_vp_clipping)  {
 		vpc->r_result[vpc->hpos_idx] = temp(vpc);
 		vpc->r_temps_discard = 0;
 	}
@@ -994,34 +981,39 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
 	}
 
 	/* Insert code to handle user clip planes */
-	for (i = 0; i < vp->ucp.nr; i++) {
-		struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT,
-						NVFX_VP_INST_DEST_CLIP(i));
-		struct nvfx_src ceqn = nvfx_src(constant(vpc, -1,
-						 nvfx->clip.ucp[i][0],
-						 nvfx->clip.ucp[i][1],
-						 nvfx->clip.ucp[i][2],
-						 nvfx->clip.ucp[i][3]));
-		struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
-		unsigned mask;
+	if(nvfx->use_vp_clipping)
+	{
+		for (i = 0; i < 6; i++) {
+			struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i));
+			struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, i));
+			struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
+			unsigned mask;
 
-		switch (i) {
-		case 0: case 3: mask = NVFX_VP_MASK_Y; break;
-		case 1: case 4: mask = NVFX_VP_MASK_Z; break;
-		case 2: case 5: mask = NVFX_VP_MASK_W; break;
-		default:
-			NOUVEAU_ERR("invalid clip dist #%d\n", i);
-			goto out_err;
-		}
+			if(nvfx->is_nv4x)
+			{
+				switch (i) {
+				case 0: case 3: mask = NVFX_VP_MASK_Y; break;
+				case 1: case 4: mask = NVFX_VP_MASK_Z; break;
+				case 2: case 5: mask = NVFX_VP_MASK_W; break;
+				default:
+					NOUVEAU_ERR("invalid clip dist #%d\n", i);
+					goto out_err;
+				}
+			}
+			else
+				mask = NVFX_VP_MASK_X;
 
-		nvfx_vp_emit(vpc, arith(VEC, DP4, cdst, mask, htmp, ceqn, none));
+			nvfx_vp_emit(vpc, arith(VEC, DP4, cdst, mask, htmp, ceqn, none));
+		}
 	}
+	else
+	{
+		if(vp->nr_insns)
+			vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
 
-	//vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
-
-	/* Append NOP + END instruction for branches to the end of the program */
-	nvfx_vp_emit(vpc, arith(VEC, NOP, none.reg, 0, none, none, none));
-        vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST | 0x1000;
+		nvfx_vp_emit(vpc, arith(VEC, NOP, none.reg, 0, none, none, none));
+		vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
+	}
 
 	if(debug_get_option_nvfx_dump_vp())
 	{
@@ -1034,6 +1026,7 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
 		debug_printf("\n");
 	}
 
+	vp->clip_nr = -1;
 	vp->exec_start = -1;
 	vp->translated = TRUE;
 out_err:
@@ -1063,13 +1056,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
 	if (nvfx->render_mode == HW) {
 		vp = nvfx->vertprog;
 		constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX];
-
-		// TODO: ouch! can't we just use constant slots for these?!
-		if ((nvfx->dirty & NVFX_NEW_UCP) ||
-		    memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) {
-			nvfx_vertprog_destroy(nvfx, vp);
-			memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp));
-		}
 	} else {
 		vp = nvfx->swtnl.vertprog;
 		constbuf = NULL;
@@ -1169,7 +1155,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
 		vp->exec_start = vp->exec->start;
 	}
 
-	if (vp->nr_consts && vp->data_start != vp->data->start) {
+	if (vp->data_start != vp->data->start) {
 		for(unsigned i = 0; i < vp->const_relocs.size; i += sizeof(struct nvfx_relocation))
 		{
 			struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->const_relocs.data + i);
@@ -1182,6 +1168,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
 		}
 
 		vp->data_start = vp->data->start;
+		upload_code = TRUE;
 	}
 
 	/* Update + Upload constant values */
@@ -1191,7 +1178,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
 		if (constbuf)
 			map = (float*)nvfx_buffer(constbuf)->data;
 
-		for (i = 0; i < vp->nr_consts; i++) {
+		for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) {
 			struct nvfx_vertex_program_data *vpd = &vp->consts[i];
 
 			if (vpd->index >= 0) {
@@ -1217,9 +1204,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
 			BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4);
 			OUT_RINGp (chan, vp->insns[i].data, 4);
 		}
+		vp->clip_nr = -1;
 	}
 
-	if(nvfx->dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
+	if(nvfx->dirty & (NVFX_NEW_VERTPROG))
 	{
 		WAIT_RING(chan, 6);
 		OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1));
@@ -1228,8 +1216,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
 			OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 1));
 			OUT_RING(chan, vp->ir);
 		}
-		OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
-		OUT_RING(chan, vp->clip_ctrl);
 	}
 
 	return TRUE;
@@ -1238,27 +1224,15 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
 void
 nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp)
 {
-	vp->translated = FALSE;
-
-	if (vp->nr_insns) {
+	if (vp->nr_insns)
 		FREE(vp->insns);
-		vp->insns = NULL;
-		vp->nr_insns = 0;
-	}
 
-	if (vp->nr_consts) {
+	if (vp->nr_consts)
 		FREE(vp->consts);
-		vp->consts = NULL;
-		vp->nr_consts = 0;
-	}
 
 	nouveau_resource_free(&vp->exec);
-	vp->exec_start = 0;
 	nouveau_resource_free(&vp->data);
-	vp->data_start = 0;
-	vp->data_start_min = 0;
 
-	vp->ir = vp->or = vp->clip_ctrl = 0;
 	util_dynarray_fini(&vp->branch_relocs);
 	util_dynarray_fini(&vp->const_relocs);
 }
-- 
cgit v1.2.3


From c2f074d8a4b93f3f3a81311f9a114b11bc5f80d8 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 23 Aug 2010 17:55:16 +0200
Subject: util: fix util_fill_rect to take util_color instead of u32 param

util_fill_rect could not handle formats with more than 32 bits,
since the fill color was a uint32_t value. Fix this by using
a util_color union instead, and also expand the union so it
works with formats which have up to 256 bits (the max of any
format currently defined).
---
 src/gallium/auxiliary/util/u_pack_color.h |  8 ++++-
 src/gallium/auxiliary/util/u_rect.c       | 51 ++++++++++++++++++++-----------
 src/gallium/auxiliary/util/u_rect.h       |  3 +-
 src/gallium/auxiliary/util/u_surface.c    | 46 +++-------------------------
 4 files changed, 47 insertions(+), 61 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 5f113f742b..aae8b8bdf1 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -42,12 +42,18 @@
 #include "util/u_math.h"
 
 
-
+/**
+ * Helper union for packing pixel values.
+ * Will often contain values in formats which are too complex to be described
+ * in simple terms, hence might just effectively contain a number of bytes.
+ * Must be big enough to hold data for all formats (currently 256 bits).
+ */
 union util_color {
    ubyte ub;
    ushort us;
    uint ui;
    float f[4];
+   double d[4];
 };
 
 /**
diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c
index 9bbcf1c8c4..56fcfac069 100644
--- a/src/gallium/auxiliary/util/u_rect.c
+++ b/src/gallium/auxiliary/util/u_rect.c
@@ -32,6 +32,7 @@
 
 #include "util/u_format.h"
 #include "util/u_rect.h"
+#include "util/u_pack_color.h"
 
 
 /**
@@ -94,7 +95,7 @@ util_fill_rect(ubyte * dst,
                unsigned dst_y,
                unsigned width,
                unsigned height,
-               uint32_t value)
+               union util_color *uc)
 {
    unsigned i, j;
    unsigned width_size;
@@ -110,40 +111,54 @@ util_fill_rect(ubyte * dst,
    dst_y /= blockheight;
    width = (width + blockwidth - 1)/blockwidth;
    height = (height + blockheight - 1)/blockheight;
-   
+
    dst += dst_x * blocksize;
    dst += dst_y * dst_stride;
    width_size = width * blocksize;
-   
+
    switch (blocksize) {
    case 1:
       if(dst_stride == width_size)
-	 memset(dst, (ubyte) value, height * width_size);
+         memset(dst, uc->ub, height * width_size);
       else {
-	 for (i = 0; i < height; i++) {
-	    memset(dst, (ubyte) value, width_size);
-	    dst += dst_stride;
-	 }
+         for (i = 0; i < height; i++) {
+            memset(dst, uc->ub, width_size);
+            dst += dst_stride;
+         }
       }
       break;
    case 2:
       for (i = 0; i < height; i++) {
-	 uint16_t *row = (uint16_t *)dst;
-	 for (j = 0; j < width; j++)
-	    *row++ = (uint16_t) value;
-	 dst += dst_stride;
+         uint16_t *row = (uint16_t *)dst;
+         for (j = 0; j < width; j++)
+            *row++ = uc->us;
+         dst += dst_stride;
       }
       break;
    case 4:
       for (i = 0; i < height; i++) {
-	 uint32_t *row = (uint32_t *)dst;
-	 for (j = 0; j < width; j++)
-	    *row++ = value;
-	 dst += dst_stride;
+         uint32_t *row = (uint32_t *)dst;
+         for (j = 0; j < width; j++)
+            *row++ = uc->ui;
+         dst += dst_stride;
+      }
+      break;
+   case 8:
+   case 12:
+   case 16:
+   case 24:
+   case 32:
+      for (i = 0; i < height; i++) {
+         ubyte *row = dst;
+         for (j = 0; j < width; j++) {
+            memcpy(row, uc, blocksize);
+            row += blocksize;
+         }
+         dst += dst_stride;
       }
       break;
    default:
-	 assert(0);
-	 break;
+      assert(0);
+      break;
    }
 }
diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h
index 40d57e662d..deb00cc80c 100644
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -36,6 +36,7 @@
 
 
 #include "pipe/p_format.h"
+#include "util/u_pack_color.h"
 
 
 extern void
@@ -47,7 +48,7 @@ util_copy_rect(ubyte * dst, enum pipe_format format,
 extern void
 util_fill_rect(ubyte * dst, enum pipe_format format,
                unsigned dst_stride, unsigned dst_x, unsigned dst_y,
-               unsigned width, unsigned height, uint32_t value);
+               unsigned width, unsigned height, union util_color *uc);
 
 
 #endif /* U_RECT_H */
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index cab7691c70..af99163b2e 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -216,7 +216,7 @@ util_clear_render_target(struct pipe_context *pipe,
    assert(dst->texture);
    if (!dst->texture)
       return;
-   util_pack_color(rgba, dst->texture->format, &uc);
+
    dst_trans = pipe_get_transfer(pipe,
 				 dst->texture,
 				 dst->face,
@@ -232,46 +232,10 @@ util_clear_render_target(struct pipe_context *pipe,
    if (dst_map) {
       assert(dst_trans->stride > 0);
 
-      switch (util_format_get_blocksize(dst->texture->format)) {
-      case 1:
-      case 2:
-      case 4:
-         util_pack_color(rgba, dst->texture->format, &uc);
-         util_fill_rect(dst_map, dst->texture->format,
-                        dst_trans->stride,
-                        0, 0, width, height, uc.ui);
-         break;
-      case 8:
-      {
-	 /* expand the 4-byte clear value to an 8-byte value */
-	 /* should probably not convert back from ubyte but not
-	    sure what this code really achieved since it doesn't even
-	    check for format type... */
-	 ushort *row = (ushort *) dst_map;
-	 ushort val0 = UBYTE_TO_USHORT((uc.ui >>  0) & 0xff);
-	 ushort val1 = UBYTE_TO_USHORT((uc.ui >>  8) & 0xff);
-	 ushort val2 = UBYTE_TO_USHORT((uc.ui >> 16) & 0xff);
-	 ushort val3 = UBYTE_TO_USHORT((uc.ui >> 24) & 0xff);
-	 unsigned i, j;
-	 val0 = (val0 << 8) | val0;
-	 val1 = (val1 << 8) | val1;
-	 val2 = (val2 << 8) | val2;
-	 val3 = (val3 << 8) | val3;
-	 for (i = 0; i < height; i++) {
-	    for (j = 0; j < width; j++) {
-	       row[j*4+0] = val0;
-	       row[j*4+1] = val1;
-	       row[j*4+2] = val2;
-	       row[j*4+3] = val3;
-	    }
-	    row += dst_trans->stride/2;
-	 }
-      }
-      break;
-      default:
-         assert(0);
-         break;
-      }
+      util_pack_color(rgba, dst->texture->format, &uc);
+      util_fill_rect(dst_map, dst->texture->format,
+                     dst_trans->stride,
+                     0, 0, width, height, &uc);
    }
 
    pipe->transfer_unmap(pipe, dst_trans);
-- 
cgit v1.2.3


From 4b2b5f8e30347ce0a1818524f8825335d47eb5ca Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Sun, 22 Aug 2010 18:40:48 -0600
Subject: tgsi: fix false CondStackTop==0 assertion

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 298f3d0a8b..0757f05dfa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3239,6 +3239,8 @@ exec_instruction(
 
          if (mach->CallStackTop == 0) {
             /* returning from main() */
+            mach->CondStackTop = 0;
+            mach->LoopStackTop = 0;
             *pc = -1;
             return;
          }
@@ -3767,6 +3769,9 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
    }
 #endif
 
+   /* Strictly speaking, these assertions aren't really needed but they
+    * can potentially catch some bugs in the control flow code.
+    */
    assert(mach->CondStackTop == 0);
    assert(mach->LoopStackTop == 0);
    assert(mach->ContStackTop == 0);
-- 
cgit v1.2.3


From c0eb479e0782c063a1a781f81b99a18ef649e9ef Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Mon, 23 Aug 2010 21:43:11 +0200
Subject: auxiliary: fix nvfx/nv50 primitive splitting for line loops

s->close_first was on the wrong side of the inequality.

Caught by blender.
Thanks to AndrewR for reporting this.
---
 src/gallium/auxiliary/util/u_split_prim.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h
index 206e1ec311..e63a7c1fad 100644
--- a/src/gallium/auxiliary/util/u_split_prim.h
+++ b/src/gallium/auxiliary/util/u_split_prim.h
@@ -48,7 +48,7 @@ util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
       }
    }
 
-   if (s->p_start + s->close_first + max_verts >= s->p_end) {
+   if ((s->p_end - s->p_start) + s->close_first <= max_verts) {
       s->emit(s->priv, s->p_start, s->p_end - s->p_start);
       if (s->close_first)
          s->emit(s->priv, s->start, 1);
-- 
cgit v1.2.3


From d1e6b31cb848ed79dd82849f277ab07c9bcdd707 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Mon, 23 Aug 2010 23:21:18 +0200
Subject: translate_sse: fix x86-64

---
 src/gallium/auxiliary/translate/translate_sse.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 5d555bbd98..92dcd408c9 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -1224,6 +1224,7 @@ static boolean incr_inputs( struct translate_sse *p,
       }
    } 
    else {
+      x64_rexw(p->func);
       x86_lea(p->func, p->idx_ESI, x86_make_disp(p->idx_ESI, index_size));
    }
    
-- 
cgit v1.2.3


From 6b6b45403740144fa5ef2ce362a4c5b9fd0066b6 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Tue, 24 Aug 2010 04:16:42 +0200
Subject: translate_sse: clear state for each function emission

Fixes #29771.
---
 src/gallium/auxiliary/translate/translate_sse.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 92dcd408c9..f8bf5b4669 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -1255,6 +1255,9 @@ static boolean build_vertex_emit( struct translate_sse *p,
    int fixup, label;
    unsigned j;
 
+   memset(p->reg_to_const, 0xff, sizeof(p->reg_to_const));
+   memset(p->const_to_reg, 0xff, sizeof(p->const_to_reg));
+
    p->tmp_EAX       = x86_make_reg(file_REG32, reg_AX);
    p->idx_ESI       = x86_make_reg(file_REG32, reg_SI);
    p->outbuf_EBX    = x86_make_reg(file_REG32, reg_BX);
@@ -1440,10 +1443,7 @@ struct translate *translate_sse2_create( const struct translate_key *key )
    if (p == NULL) 
       goto fail;
    memset(p, 0, sizeof(*p));
-
    memcpy(p->consts, consts, sizeof(consts));
-   memset(p->reg_to_const, 0xff, sizeof(p->reg_to_const));
-   memset(p->const_to_reg, 0xff, sizeof(p->const_to_reg));
 
    p->translate.key = *key;
    p->translate.release = translate_sse_release;
-- 
cgit v1.2.3


From 72ae834fa16a32cc58ae7a93e74f6e11822fcac0 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Tue, 24 Aug 2010 23:50:45 -0700
Subject: gallivm: Include missing header in lp_bld_pack.h.

Include p_compiler.h for boolean symbol.
---
 src/gallium/auxiliary/gallivm/lp_bld_pack.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index e470082b97..e947b90d16 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -37,6 +37,8 @@
 #define LP_BLD_PACK_H
 
 
+#include "pipe/p_compiler.h"
+
 #include "gallivm/lp_bld.h"
 
 
-- 
cgit v1.2.3


From deffeba17204c249cac698a516a210e364d2cf55 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Tue, 24 Aug 2010 23:53:26 -0700
Subject: gallivm: Include missing header in lp_bld_sample.h.

Include p_format.h for enum pipe_format symbol.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 5b8f478094..aff7bb2a4d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -36,6 +36,8 @@
 #define LP_BLD_SAMPLE_H
 
 
+#include "pipe/p_format.h"
+
 #include "gallivm/lp_bld.h"
 
 struct pipe_resource;
-- 
cgit v1.2.3


From 4f024e0f642f4f743e4d051ec71c00e45bfd361f Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Wed, 25 Aug 2010 14:02:12 +0800
Subject: draw: Add draw_set_index_buffer and others.

This commit adds draw_set_index_buffer, draw_set_mapped_index_buffer,
and draw_vbo.  The idea behind the new functions is that an index buffer
should be a state.

draw_arrays and draw_set_mapped_element_buffer are preserved, but the
latter will be removed soon.
---
 src/gallium/auxiliary/draw/draw_context.c       | 35 +++++++++--
 src/gallium/auxiliary/draw/draw_context.h       |  9 +++
 src/gallium/auxiliary/draw/draw_private.h       |  2 +
 src/gallium/auxiliary/draw/draw_pt.c            | 83 ++++++++++++++++++-------
 src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h |  6 +-
 5 files changed, 104 insertions(+), 31 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index d118a8db52..c2b7a441bd 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -496,6 +496,27 @@ void draw_set_render( struct draw_context *draw,
 }
 
 
+void
+draw_set_index_buffer(struct draw_context *draw,
+                      const struct pipe_index_buffer *ib)
+{
+   if (ib)
+      memcpy(&draw->pt.index_buffer, ib, sizeof(draw->pt.index_buffer));
+   else
+      memset(&draw->pt.index_buffer, 0, sizeof(draw->pt.index_buffer));
+}
+
+
+/**
+ * Tell drawing context where to find mapped index/element buffer.
+ */
+void
+draw_set_mapped_index_buffer(struct draw_context *draw,
+                             const void *elements)
+{
+    draw->pt.user.elts = elements;
+}
+
 
 /**
  * Tell the drawing context about the index/element buffer to use
@@ -515,8 +536,13 @@ draw_set_mapped_element_buffer_range( struct draw_context *draw,
                                       unsigned max_index,
                                       const void *elements )
 {
+   struct pipe_index_buffer ib;
+
+   memset(&ib, 0, sizeof(ib));
+   ib.index_size = eltSize;
+   draw_set_index_buffer(draw, &ib);
+
    draw->pt.user.elts = elements;
-   draw->pt.user.eltSize = eltSize;
    draw->pt.user.eltBias = eltBias;
    draw->pt.user.min_index = min_index;
    draw->pt.user.max_index = max_index;
@@ -529,11 +555,8 @@ draw_set_mapped_element_buffer( struct draw_context *draw,
                                 int eltBias,
                                 const void *elements )
 {
-   draw->pt.user.elts = elements;
-   draw->pt.user.eltSize = eltSize;
-   draw->pt.user.eltBias = eltBias;
-   draw->pt.user.min_index = 0;
-   draw->pt.user.max_index = 0xffffffff;
+   draw_set_mapped_element_buffer_range(draw,
+         eltSize, eltBias, 0, 0xffffffff, elements);
 }
 
  
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 116716af6f..e9f3237dda 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -160,6 +160,12 @@ void draw_set_vertex_elements(struct draw_context *draw,
 			      unsigned count,
                               const struct pipe_vertex_element *elements);
 
+void draw_set_index_buffer(struct draw_context *draw,
+                           const struct pipe_index_buffer *ib);
+
+void draw_set_mapped_index_buffer(struct draw_context *draw,
+                                  const void *elements);
+
 void
 draw_set_mapped_element_buffer_range( struct draw_context *draw,
                                       unsigned eltSize,
@@ -196,6 +202,9 @@ draw_set_so_state(struct draw_context *draw,
  * draw_pt.c 
  */
 
+void draw_vbo(struct draw_context *draw,
+              const struct pipe_draw_info *info);
+
 void draw_arrays(struct draw_context *draw, unsigned prim,
 		 unsigned start, unsigned count);
 
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 854c45f060..7bc3923692 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -149,6 +149,8 @@ struct draw_context
       struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
       unsigned nr_vertex_elements;
 
+      struct pipe_index_buffer index_buffer;
+
       /* user-space vertex data, buffers */
       struct {
          /** vertex element/index buffer (ex: glDrawElements) */
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index feacd8258b..8db0d73662 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -39,6 +39,7 @@
 #include "util/u_math.h"
 #include "util/u_prim.h"
 #include "util/u_format.h"
+#include "util/u_draw.h"
 
 
 DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
@@ -189,24 +190,29 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
       uint ii = 0;
       uint j;
 
-      if (draw->pt.user.elts) {
+      if (draw->pt.user.eltSize) {
+         const char *elts;
+
          /* indexed arrays */
+         elts = (const char *) draw->pt.user.elts;
+         elts += draw->pt.index_buffer.offset;
+
          switch (draw->pt.user.eltSize) {
          case 1:
             {
-               const ubyte *elem = (const ubyte *) draw->pt.user.elts;
+               const ubyte *elem = (const ubyte *) elts;
                ii = elem[start + i];
             }
             break;
          case 2:
             {
-               const ushort *elem = (const ushort *) draw->pt.user.elts;
+               const ushort *elem = (const ushort *) elts;
                ii = elem[start + i];
             }
             break;
          case 4:
             {
-               const uint *elem = (const uint *) draw->pt.user.elts;
+               const uint *elem = (const uint *) elts;
                ii = elem[start + i];
             }
             break;
@@ -292,17 +298,9 @@ draw_arrays(struct draw_context *draw, unsigned prim,
 
 
 /**
- * Draw vertex arrays.
- * This is the main entrypoint into the drawing module.
- * If drawing an indexed primitive, the draw_set_mapped_element_buffer_range()
- * function should have already been called to specify the element/index buffer
- * information.
- *
- * \param prim  one of PIPE_PRIM_x
- * \param start  index of first vertex to draw
- * \param count  number of vertices to draw
- * \param startInstance  number for the first primitive instance (usually 0).
- * \param instanceCount  number of instances to draw (1=non-instanced)
+ * Instanced drawing.
+ * draw_set_mapped_element_buffer must be called before calling this function.
+ * \sa draw_vbo
  */
 void
 draw_arrays_instanced(struct draw_context *draw,
@@ -312,10 +310,49 @@ draw_arrays_instanced(struct draw_context *draw,
                       unsigned startInstance,
                       unsigned instanceCount)
 {
-   unsigned reduced_prim = u_reduced_prim(mode);
+   struct pipe_draw_info info;
+
+   util_draw_init_info(&info);
+
+   info.mode = mode;
+   info.start = start;
+   info.count = count;
+   info.start_instance = startInstance;
+   info.instance_count = instanceCount;
+
+   info.indexed = (draw->pt.user.elts != NULL);
+   info.index_bias = draw->pt.user.eltBias;
+   info.min_index = draw->pt.user.min_index;
+   info.max_index = draw->pt.user.max_index;
+
+   draw_vbo(draw, &info);
+}
+
+
+/**
+ * Draw vertex arrays.
+ * This is the main entrypoint into the drawing module.  If drawing an indexed
+ * primitive, the draw_set_index_buffer() and draw_set_mapped_index_buffer()
+ * functions should have already been called to specify the element/index
+ * buffer information.
+ */
+void
+draw_vbo(struct draw_context *draw,
+         const struct pipe_draw_info *info)
+{
+   unsigned reduced_prim = u_reduced_prim(info->mode);
    unsigned instance;
 
-   assert(instanceCount > 0);
+   assert(info->instance_count > 0);
+   if (info->indexed)
+      assert(draw->pt.user.elts);
+
+   draw->pt.user.eltSize =
+      (info->indexed) ? draw->pt.index_buffer.index_size : 0;
+
+   draw->pt.user.eltBias = info->index_bias;
+   draw->pt.user.min_index = info->min_index;
+   draw->pt.user.max_index = info->max_index;
 
    if (reduced_prim != draw->reduced_prim) {
       draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
@@ -323,8 +360,8 @@ draw_arrays_instanced(struct draw_context *draw,
    }
 
    if (0)
-      debug_printf("draw_arrays(mode=%u start=%u count=%u):\n",
-                   mode, start, count);
+      debug_printf("draw_vbo(mode=%u start=%u count=%u):\n",
+                   info->mode, info->start, info->count);
 
    if (0)
       tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
@@ -352,10 +389,10 @@ draw_arrays_instanced(struct draw_context *draw,
    }
 
    if (0)
-      draw_print_arrays(draw, mode, start, MIN2(count, 20));
+      draw_print_arrays(draw, info->mode, info->start, MIN2(info->count, 20));
 
-   for (instance = 0; instance < instanceCount; instance++) {
-      draw->instance_id = instance + startInstance;
-      draw_pt_arrays(draw, mode, start, count);
+   for (instance = 0; instance < info->instance_count; instance++) {
+      draw->instance_id = instance + info->start_instance;
+      draw_pt_arrays(draw, info->mode, info->start, info->count);
    }
 }
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
index 4bb57b1493..3f66f962e1 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -38,7 +38,8 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
                                     unsigned istart, unsigned icount)
 {
    struct draw_context *draw = vsplit->draw;
-   const ELT_TYPE *ib = (const ELT_TYPE *) draw->pt.user.elts;
+   const ELT_TYPE *ib = (const ELT_TYPE *)
+      ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
    const unsigned min_index = draw->pt.user.min_index;
    const unsigned max_index = draw->pt.user.max_index;
    const int elt_bias = draw->pt.user.eltBias;
@@ -119,7 +120,8 @@ CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
                                         boolean close, unsigned iclose)
 {
    struct draw_context *draw = vsplit->draw;
-   const ELT_TYPE *ib = (const ELT_TYPE *) draw->pt.user.elts;
+   const ELT_TYPE *ib = (const ELT_TYPE *)
+      ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
    const int ibias = draw->pt.user.eltBias;
    unsigned i;
 
-- 
cgit v1.2.3


From 22f6026324f63c142925244ff575fefc29a90389 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Wed, 25 Aug 2010 15:11:03 +0800
Subject: gallium: Use draw_set_index_buffer and others.

Update all drivers to use draw_set_index_buffer,
draw_set_mapped_index_buffer, and draw_vbo.  Remove
draw_set_mapped_element_buffer and draw_set_mapped_element_buffer_range.
---
 src/gallium/auxiliary/draw/draw_context.c        | 42 ----------------------
 src/gallium/auxiliary/draw/draw_context.h        | 13 -------
 src/gallium/auxiliary/draw/draw_pt.c             |  8 ++---
 src/gallium/drivers/cell/ppu/cell_draw_arrays.c  | 15 +++-----
 src/gallium/drivers/cell/ppu/cell_state_vertex.c |  2 +-
 src/gallium/drivers/i915/i915_context.c          | 22 ++++--------
 src/gallium/drivers/i915/i915_state.c            |  3 +-
 src/gallium/drivers/llvmpipe/lp_draw_arrays.c    | 18 +++-------
 src/gallium/drivers/llvmpipe/lp_state_vertex.c   |  2 +-
 src/gallium/drivers/nvfx/nvfx_draw.c             | 12 +++----
 src/gallium/drivers/nvfx/nvfx_state_emit.c       |  3 ++
 src/gallium/drivers/r300/r300_render.c           | 14 ++------
 src/gallium/drivers/r300/r300_state.c            |  7 +++-
 src/gallium/drivers/softpipe/sp_draw_arrays.c    | 26 ++++----------
 src/gallium/drivers/softpipe/sp_state_vertex.c   |  2 +-
 src/gallium/drivers/svga/svga_swtnl_draw.c       | 17 ++++-----
 src/mesa/state_tracker/st_draw_feedback.c        | 46 +++++++++++++-----------
 17 files changed, 80 insertions(+), 172 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index c2b7a441bd..b39b835f05 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -518,48 +518,6 @@ draw_set_mapped_index_buffer(struct draw_context *draw,
 }
 
 
-/**
- * Tell the drawing context about the index/element buffer to use
- * (ala glDrawElements)
- * If no element buffer is to be used (i.e. glDrawArrays) then this
- * should be called with eltSize=0 and elements=NULL.
- *
- * \param draw  the drawing context
- * \param eltSize  size of each element (1, 2 or 4 bytes)
- * \param elements  the element buffer ptr
- */
-void
-draw_set_mapped_element_buffer_range( struct draw_context *draw,
-                                      unsigned eltSize,
-                                      int eltBias,
-                                      unsigned min_index,
-                                      unsigned max_index,
-                                      const void *elements )
-{
-   struct pipe_index_buffer ib;
-
-   memset(&ib, 0, sizeof(ib));
-   ib.index_size = eltSize;
-   draw_set_index_buffer(draw, &ib);
-
-   draw->pt.user.elts = elements;
-   draw->pt.user.eltBias = eltBias;
-   draw->pt.user.min_index = min_index;
-   draw->pt.user.max_index = max_index;
-}
-
-
-void
-draw_set_mapped_element_buffer( struct draw_context *draw,
-                                unsigned eltSize,
-                                int eltBias,
-                                const void *elements )
-{
-   draw_set_mapped_element_buffer_range(draw,
-         eltSize, eltBias, 0, 0xffffffff, elements);
-}
-
- 
 /* Revamp me please:
  */
 void draw_do_flush( struct draw_context *draw, unsigned flags )
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index e9f3237dda..ea55320c42 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -166,19 +166,6 @@ void draw_set_index_buffer(struct draw_context *draw,
 void draw_set_mapped_index_buffer(struct draw_context *draw,
                                   const void *elements);
 
-void
-draw_set_mapped_element_buffer_range( struct draw_context *draw,
-                                      unsigned eltSize,
-                                      int eltBias,
-                                      unsigned min_index,
-                                      unsigned max_index,
-                                      const void *elements );
-
-void draw_set_mapped_element_buffer( struct draw_context *draw,
-                                     unsigned eltSize, 
-                                     int eltBias,
-                                     const void *elements );
-
 void draw_set_mapped_vertex_buffer(struct draw_context *draw,
                                    unsigned attr, const void *buffer);
 
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 8db0d73662..f81714d6b4 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -299,7 +299,6 @@ draw_arrays(struct draw_context *draw, unsigned prim,
 
 /**
  * Instanced drawing.
- * draw_set_mapped_element_buffer must be called before calling this function.
  * \sa draw_vbo
  */
 void
@@ -321,9 +320,10 @@ draw_arrays_instanced(struct draw_context *draw,
    info.instance_count = instanceCount;
 
    info.indexed = (draw->pt.user.elts != NULL);
-   info.index_bias = draw->pt.user.eltBias;
-   info.min_index = draw->pt.user.min_index;
-   info.max_index = draw->pt.user.max_index;
+   if (!info.indexed) {
+      info.min_index = start;
+      info.max_index = start + count - 1;
+   }
 
    draw_vbo(draw, &info);
 }
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index 4adef5b8c0..a367fa3fe1 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -78,20 +78,13 @@ cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       draw_set_mapped_vertex_buffer(draw, i, buf);
    }
    /* Map index buffer, if present */
-   if (info->indexed && cell->index_buffer.buffer) {
+   if (info->indexed && cell->index_buffer.buffer)
       mapped_indices = cell_resource(cell->index_buffer.buffer)->data;
-      mapped_indices += cell->index_buffer.offset;
-   }
 
-   draw_set_mapped_element_buffer_range(draw, (mapped_indices) ?
-                                        lp->index_buffer.index_size : 0,
-                                        info->index_bias,
-                                        info->min_index,
-                                        info->max_index,
-                                        mapped_indices);
+   draw_set_mapped_index_buffer(draw, mapped_indices);
 
    /* draw! */
-   draw_arrays(draw, info->mode, info->start, info->count);
+   draw_vbo(draw, info);
 
    /*
     * unmap vertex/index buffers - will cause draw module to flush
@@ -100,7 +93,7 @@ cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       draw_set_mapped_vertex_buffer(draw, i, NULL);
    }
    if (mapped_indices) {
-      draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+      draw_set_mapped_index_buffer(draw, NULL);
    }
 
    /*
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
index 4e3701cd0a..a065d68b5a 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
@@ -102,7 +102,7 @@ cell_set_index_buffer(struct pipe_context *pipe,
    else
       memset(&cell->index_buffer, 0, sizeof(cell->index_buffer));
 
-   /* TODO make this more like a state */
+   draw_set_index_buffer(cell->draw, ib);
 }
 
 
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index 2beb9e3091..847dd6dd47 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -66,18 +66,9 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    /*
     * Map index buffer, if present
     */
-   if (info->indexed && i915->index_buffer.buffer) {
-      char *indices = (char *) i915_buffer(i915->index_buffer.buffer)->data;
-      mapped_indices = (void *) (indices + i915->index_buffer.offset);
-   }
-
-   draw_set_mapped_element_buffer_range(draw, (mapped_indices) ?
-                                        i915->index_buffer.index_size : 0,
-                                        info->index_bias,
-                                        info->min_index,
-                                        info->max_index,
-                                        mapped_indices);
-
+   if (info->indexed && i915->index_buffer.buffer)
+      mapped_indices = i915_buffer(i915->index_buffer.buffer)->data;
+   draw_set_mapped_index_buffer(draw, mapped_indices);
 
    draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
                                    i915->current.constants[PIPE_SHADER_VERTEX],
@@ -87,7 +78,7 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    /*
     * Do the drawing
     */
-   draw_arrays(i915->draw, info->mode, info->start, info->count);
+   draw_vbo(i915->draw, info);
 
    /*
     * unmap vertex/index buffers
@@ -96,9 +87,8 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       draw_set_mapped_vertex_buffer(draw, i, NULL);
    }
 
-   if (mapped_indices) {
-      draw_set_mapped_element_buffer(draw, 0, 0, NULL);
-   }
+   if (mapped_indices)
+      draw_set_mapped_index_buffer(draw, NULL);
 }
 
 
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 8c53b06931..bbfcff6bc4 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -817,7 +817,8 @@ static void i915_set_index_buffer(struct pipe_context *pipe,
    else
       memset(&i915->index_buffer, 0, sizeof(i915->index_buffer));
 
-   /* TODO make this more like a state */
+   /* pass-through to draw module */
+   draw_set_index_buffer(i915->draw, ib);
 }
 
 static void
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index e73b431cb4..3af5c8d5c5 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -68,25 +68,17 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    }
 
    /* Map index buffer, if present */
-   if (info->indexed && lp->index_buffer.buffer) {
-      char *indices = (char *) llvmpipe_resource_data(lp->index_buffer.buffer);
-      mapped_indices = (void *) (indices + lp->index_buffer.offset);
-   }
+   if (info->indexed && lp->index_buffer.buffer)
+      mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer);
 
-   draw_set_mapped_element_buffer_range(draw, (mapped_indices) ?
-                                        lp->index_buffer.index_size : 0,
-                                        info->index_bias,
-                                        info->min_index,
-                                        info->max_index,
-                                        mapped_indices);
+   draw_set_mapped_index_buffer(draw, mapped_indices);
 
    llvmpipe_prepare_vertex_sampling(lp,
                                     lp->num_vertex_sampler_views,
                                     lp->vertex_sampler_views);
 
    /* draw! */
-   draw_arrays_instanced(draw, info->mode, info->start, info->count,
-         info->start_instance, info->instance_count);
+   draw_vbo(draw, info);
 
    /*
     * unmap vertex/index buffers
@@ -95,7 +87,7 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       draw_set_mapped_vertex_buffer(draw, i, NULL);
    }
    if (mapped_indices) {
-      draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+      draw_set_mapped_index_buffer(draw, NULL);
    }
    llvmpipe_cleanup_vertex_sampling(lp);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
index d86e66b4fb..fb29423dd3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -100,7 +100,7 @@ llvmpipe_set_index_buffer(struct pipe_context *pipe,
    else
       memset(&llvmpipe->index_buffer, 0, sizeof(llvmpipe->index_buffer));
 
-   /* TODO make this more like a state */
+   draw_set_index_buffer(llvmpipe->draw, ib);
 }
 
 void
diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c
index 0b17921295..2601d5b8e2 100644
--- a/src/gallium/drivers/nvfx/nvfx_draw.c
+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
@@ -239,12 +239,10 @@ nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info
 		draw_set_mapped_vertex_buffer(nvfx->draw, i, map);
 	}
 
-	if (info->indexed) {
-		map = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset;
-		draw_set_mapped_element_buffer_range(nvfx->draw, nvfx->idxbuf.index_size, info->index_bias, info->min_index, info->max_index, map);
-	} else {
-		draw_set_mapped_element_buffer(nvfx->draw, 0, 0, NULL);
-	}
+	map = NULL;
+	if (info->indexed && nvfx->idxbuf.buffer)
+		map = nvfx_buffer(nvfx->idxbuf.buffer)->data;
+	draw_set_mapped_index_buffer(nvfx->draw, map);
 
 	if (nvfx->constbuf[PIPE_SHADER_VERTEX]) {
 		const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX];
@@ -254,7 +252,7 @@ nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info
                                                 map, nr);
 	}
 
-	draw_arrays_instanced(nvfx->draw, info->mode, info->start, info->count, info->start_instance, info->instance_count);
+	draw_vbo(nvfx->draw, info);
 
 	draw_flush(nvfx->draw);
 }
diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c
index cfcb0f7ef6..390bca8cdb 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
@@ -335,6 +335,9 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx)
 		draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe);
 	}
 
+	if (nvfx->draw_dirty & NVFX_NEW_INDEX)
+		draw_set_index_buffer(draw, &nvfx->idxbuf);
+
 	nvfx_state_validate_common(nvfx);
 
 	nvfx->draw_dirty = 0;
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index e08335a105..20bad2c56f 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -680,18 +680,11 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
     if (info->indexed && r300->index_buffer.buffer) {
         indices = pipe_buffer_map(pipe, r300->index_buffer.buffer,
                                   PIPE_TRANSFER_READ, &ib_transfer);
-        if (indices)
-            indices = (void *) ((char *) indices + r300->index_buffer.offset);
     }
 
-    draw_set_mapped_element_buffer_range(r300->draw, (indices) ?
-                                         r300->index_buffer.index_size : 0,
-                                         info->index_bias,
-                                         info->min_index,
-                                         info->max_index,
-                                         indices);
+    draw_set_mapped_index_buffer(r300->draw, indices);
 
-    draw_arrays(r300->draw, info->mode, info->start, count);
+    draw_vbo(r300->draw, info);
 
     /* XXX Not sure whether this is the best fix.
      * It prevents CS from being rejected and weird assertion failures. */
@@ -707,8 +700,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
 
     if (ib_transfer) {
         pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer);
-        draw_set_mapped_element_buffer_range(r300->draw, 0, 0, info->start,
-                info->start + count - 1, NULL);
+        draw_set_mapped_index_buffer(r300->draw, NULL);
     }
 }
 
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 47e359cd5f..5c225e24f9 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1556,7 +1556,12 @@ static void r300_set_index_buffer(struct pipe_context* pipe,
         memset(&r300->index_buffer, 0, sizeof(r300->index_buffer));
     }
 
-    /* TODO make this more like a state */
+    if (r300->screen->caps.has_tcl) {
+       /* TODO make this more like a state */
+    }
+    else {
+       draw_set_index_buffer(r300->draw, ib);
+    }
 }
 
 /* Initialize the PSC tables. */
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 386c8acb8c..01b4ca985d 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -75,14 +75,10 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode)
    buf = (void*)((int32_t*)buf + offset);
    draw_set_mapped_vertex_buffer(draw, 0, buf);
 
-   draw_set_mapped_element_buffer_range(draw,
-                                        0, 0,
-                                        start,
-                                        start + count - 1,
-                                        NULL);
+   draw_set_mapped_index_buffer(draw, NULL);
 
    /* draw! */
-   draw_arrays_instanced(draw, mode, start, count, 0, 1);
+   draw_arrays(draw, mode, start, count);
 
    /* unmap vertex/index buffers - will cause draw module to flush */
    draw_set_mapped_vertex_buffer(draw, 0, NULL);
@@ -138,28 +134,20 @@ softpipe_draw_vbo(struct pipe_context *pipe,
    }
 
    /* Map index buffer, if present */
-   if (info->indexed && sp->index_buffer.buffer) {
-      char *indices = (char *) softpipe_resource(sp->index_buffer.buffer)->data;
-      mapped_indices = (void *) (indices + sp->index_buffer.offset);
-   }
+   if (info->indexed && sp->index_buffer.buffer)
+      mapped_indices = softpipe_resource(sp->index_buffer.buffer)->data;
 
-   draw_set_mapped_element_buffer_range(draw, (mapped_indices) ?
-                                        sp->index_buffer.index_size : 0,
-                                        info->index_bias,
-                                        info->min_index,
-                                        info->max_index,
-                                        mapped_indices);
+   draw_set_mapped_index_buffer(draw, mapped_indices);
 
    /* draw! */
-   draw_arrays_instanced(draw, info->mode, info->start, info->count,
-         info->start_instance, info->instance_count);
+   draw_vbo(draw, info);
 
    /* unmap vertex/index buffers - will cause draw module to flush */
    for (i = 0; i < sp->num_vertex_buffers; i++) {
       draw_set_mapped_vertex_buffer(draw, i, NULL);
    }
    if (mapped_indices) {
-      draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+      draw_set_mapped_index_buffer(draw, NULL);
    }
 
    /*
diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c
index 880a7c7cd2..b650fcaea5 100644
--- a/src/gallium/drivers/softpipe/sp_state_vertex.c
+++ b/src/gallium/drivers/softpipe/sp_state_vertex.c
@@ -100,5 +100,5 @@ softpipe_set_index_buffer(struct pipe_context *pipe,
    else
       memset(&softpipe->index_buffer, 0, sizeof(softpipe->index_buffer));
 
-   /* TODO make this more like a state */
+   draw_set_index_buffer(softpipe->draw, ib);
 }
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
index 4f83822b5c..e9eba3b422 100644
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -71,22 +71,17 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
       draw_set_mapped_vertex_buffer(draw, i, map);
    }
 
+   /* TODO move this to update_swtnl_draw */
+   draw_set_index_buffer(draw, &svga->curr.ib);
+
    /* Map index buffer, if present */
    map = NULL;
    if (info->indexed && svga->curr.ib.buffer) {
       map = pipe_buffer_map(&svga->pipe, svga->curr.ib.buffer,
                             PIPE_TRANSFER_READ,
                             &ib_transfer);
-      if (map)
-         map = (const void *) ((const char *) map + svga->curr.ib.offset);
    }
-
-   draw_set_mapped_element_buffer_range(draw, (map) ?
-                                        svga->curr.ib.index_size : 0,
-                                        info->index_bias,
-                                        info->min_index,
-                                        info->max_index,
-                                        map);
+   draw_set_mapped_index_buffer(draw, map);
 
    if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
       map = pipe_buffer_map(&svga->pipe,
@@ -100,7 +95,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
          svga->curr.cb[PIPE_SHADER_VERTEX]->width0);
    }
 
-   draw_arrays(draw, info->mode, info->start, info->count);
+   draw_vbo(draw, info);
 
    draw_flush(svga->swtnl.draw);
 
@@ -118,7 +113,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
 
    if (ib_transfer) {
       pipe_buffer_unmap(&svga->pipe, svga->curr.ib.buffer, ib_transfer);
-      draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+      draw_set_mapped_index_buffer(draw, NULL);
    }
 
    if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c
index 5cf2666334..e0995f8318 100644
--- a/src/mesa/state_tracker/st_draw_feedback.c
+++ b/src/mesa/state_tracker/st_draw_feedback.c
@@ -40,6 +40,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "util/u_inlines.h"
+#include "util/u_draw.h"
 
 #include "draw/draw_private.h"
 #include "draw/draw_context.h"
@@ -104,14 +105,15 @@ st_feedback_draw_vbo(GLcontext *ctx,
    struct draw_context *draw = st->draw;
    const struct st_vertex_program *vp;
    const struct pipe_shader_state *vs;
-   struct pipe_resource *index_buffer_handle = 0;
    struct pipe_vertex_buffer vbuffers[PIPE_MAX_SHADER_INPUTS];
    struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
+   struct pipe_index_buffer ibuffer;
    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
    struct pipe_transfer *ib_transfer = NULL;
    struct pipe_transfer *cb_transfer;
    GLuint attr, i;
    ubyte *mapped_constants;
+   const void *mapped_indices = NULL;
 
    assert(draw);
 
@@ -204,17 +206,19 @@ st_feedback_draw_vbo(GLcontext *ctx,
    draw_set_vertex_buffers(draw, vp->num_inputs, vbuffers);
    draw_set_vertex_elements(draw, vp->num_inputs, velements);
 
+   memset(&ibuffer, 0, sizeof(ibuffer));
    if (ib) {
       struct gl_buffer_object *bufobj = ib->obj;
-      unsigned indexSize;
-      void *map;
 
       switch (ib->type) {
       case GL_UNSIGNED_INT:
-         indexSize = 4;
+         ibuffer.index_size = 4;
          break;
       case GL_UNSIGNED_SHORT:
-         indexSize = 2;
+         ibuffer.index_size = 2;
+         break;
+      case GL_UNSIGNED_BYTE:
+         ibuffer.index_size = 1;
          break;
       default:
          assert(0);
@@ -224,23 +228,20 @@ st_feedback_draw_vbo(GLcontext *ctx,
       if (bufobj && bufobj->Name) {
          struct st_buffer_object *stobj = st_buffer_object(bufobj);
 
-         index_buffer_handle = stobj->buffer;
-
-         map = pipe_buffer_map(pipe, index_buffer_handle,
-                               PIPE_TRANSFER_READ, &ib_transfer);
+         pipe_resource_reference(&ibuffer.buffer, stobj->buffer);
+         ibuffer.offset = pointer_to_offset(ib->ptr);
 
-         draw_set_mapped_element_buffer(draw, indexSize, 0, map);
+         mapped_indices = pipe_buffer_map(pipe, stobj->buffer,
+                                          PIPE_TRANSFER_READ, &ib_transfer);
       }
       else {
-         draw_set_mapped_element_buffer(draw, indexSize, 0, (void *) ib->ptr);
-	 ib_transfer = NULL;
+         /* skip setting ibuffer.buffer as the draw module does not use it */
+         mapped_indices = ib->ptr;
       }
-   }
-   else {
-      /* no index/element buffer */
-      draw_set_mapped_element_buffer(draw, 0, 0, NULL);
-   }
 
+      draw_set_index_buffer(draw, &ibuffer);
+      draw_set_mapped_index_buffer(draw, mapped_indices);
+   }
 
    /* map constant buffers */
    mapped_constants = pipe_buffer_map(pipe,
@@ -273,9 +274,14 @@ st_feedback_draw_vbo(GLcontext *ctx,
          draw_set_mapped_vertex_buffer(draw, i, NULL);
       }
    }
-   if (index_buffer_handle) {
-      pipe_buffer_unmap(pipe, index_buffer_handle, ib_transfer);
-      draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+
+   if (ib) {
+      draw_set_mapped_index_buffer(draw, NULL);
+      draw_set_index_buffer(draw, NULL);
+
+      if (ib_transfer)
+         pipe_buffer_unmap(pipe, ibuffer.buffer, ib_transfer);
+      pipe_resource_reference(&ibuffer.buffer, NULL);
    }
 }
 
-- 
cgit v1.2.3


From d29d7807c1e2c53336b1adaf0ecdeb3e35b39969 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sat, 7 Aug 2010 21:08:23 +0800
Subject: draw: Remove UNDEFINED_VERTEX_ID checks in emit pathes.

UNDEFINED_VERTEX_ID is used by draw_pipe_vbuf to decide whether a vertex
has been emitted or not.  The non-pipeline pathes do not use it (they
tell the frontend the max vertex count when prepare() is called).
---
 src/gallium/auxiliary/draw/draw_pt_emit.c             |  8 --------
 src/gallium/auxiliary/draw/draw_pt_fetch_emit.c       | 11 -----------
 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c |  9 ---------
 3 files changed, 28 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 89d96c4235..c8dfc16911 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -144,11 +144,6 @@ void draw_pt_emit( struct pt_emit *emit,
    if (vertex_count == 0)
       return;
 
-   if (vertex_count >= UNDEFINED_VERTEX_ID) {
-      assert(0);
-      return;
-   }
-
    /* XXX: and work out some way to coordinate the render primitive
     * between vbuf.c and here...
     */
@@ -223,9 +218,6 @@ void draw_pt_emit_linear(struct pt_emit *emit,
     */
    draw_do_flush( draw, DRAW_FLUSH_BACKEND );
 
-   if (count >= UNDEFINED_VERTEX_ID)
-      goto fail;
-
    /* XXX: and work out some way to coordinate the render primitive
     * between vbuf.c and here...
     */
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 80a89428b6..e706b7796f 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -212,11 +212,6 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
     */
    draw_do_flush( draw, DRAW_FLUSH_BACKEND );
 
-   if (fetch_count >= UNDEFINED_VERTEX_ID) {
-      assert(0);
-      return;
-   }
-
    draw->render->allocate_vertices( draw->render,
                                     (ushort)feme->translate->key.output_stride,
                                     (ushort)fetch_count );
@@ -276,9 +271,6 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
     */
    draw_do_flush( draw, DRAW_FLUSH_BACKEND );
 
-   if (count >= UNDEFINED_VERTEX_ID) 
-      goto fail;
-
    if (!draw->render->allocate_vertices( draw->render,
                                          (ushort)feme->translate->key.output_stride,
                                          (ushort)count )) 
@@ -338,9 +330,6 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
     */
    draw_do_flush( draw, DRAW_FLUSH_BACKEND );
 
-   if (count >= UNDEFINED_VERTEX_ID)
-      return FALSE;
-
    if (!draw->render->allocate_vertices( draw->render,
                                          (ushort)feme->translate->key.output_stride,
                                          (ushort)count ))
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index a31d3feb16..4fbf88844a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -199,9 +199,6 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
     */
    draw_do_flush( draw, DRAW_FLUSH_BACKEND );
 
-   if (count >= UNDEFINED_VERTEX_ID) 
-      goto fail;
-
    if (!draw->render->allocate_vertices( draw->render,
                                          (ushort)fse->key.output_stride,
                                          (ushort)count ))
@@ -268,9 +265,6 @@ fse_run(struct draw_pt_middle_end *middle,
     */
    draw_do_flush( draw, DRAW_FLUSH_BACKEND );
 
-   if (fetch_count >= UNDEFINED_VERTEX_ID) 
-      goto fail;
-
    if (!draw->render->allocate_vertices( draw->render,
                                          (ushort)fse->key.output_stride,
                                          (ushort)fetch_count ))
@@ -331,9 +325,6 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
     */
    draw_do_flush( draw, DRAW_FLUSH_BACKEND );
 
-   if (count >= UNDEFINED_VERTEX_ID)
-      return FALSE;
-
    if (!draw->render->allocate_vertices( draw->render,
                                          (ushort)fse->key.output_stride,
                                          (ushort)count ))
-- 
cgit v1.2.3


From 4cef3087261317f04e4a06cc645c895d31f6e06b Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 20 Aug 2010 11:40:26 +0100
Subject: util: add rectangle helpers to u_rect.h

This begins a process of repurposing this file.  The existing usage is
as a header file for some software blit fallbacks, which should be
moved to a more appropriately named header.
---
 src/gallium/auxiliary/util/u_rect.h | 57 ++++++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h
index deb00cc80c..4cb90d3c31 100644
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -26,19 +26,68 @@
  **************************************************************************/
 
 
-/**
- * Pipe copy/fill rect helpers.
+#ifndef U_RECT_H
+#define U_RECT_H
+
+#include "pipe/p_compiler.h"
+
+struct u_rect {
+   int x0, x1;
+   int y0, y1;
+};
+
+/* Do two rectangles intersect?
  */
+static INLINE boolean
+u_rect_test_intersection(const struct u_rect *a,
+                         const struct u_rect *b)
+{
+   return (!(a->x1 < b->x0 ||
+             b->x1 < a->x0 ||
+             a->y1 < b->y0 ||
+             b->y1 < a->y0));
+}
 
+/* Find the intersection of two rectangles known to intersect.
+ */
+static INLINE void
+u_rect_find_intersection(const struct u_rect *a,
+                         struct u_rect *b)
+{
+   /* Caller should verify intersection exists before calling.
+    */
+   if (b->x0 < a->x0) b->x0 = a->x0;
+   if (b->x1 > a->x1) b->x1 = a->x1;
+   if (b->y0 < a->y0) b->y0 = a->y0;
+   if (b->y1 > a->y1) b->y1 = a->y1;
+}
 
-#ifndef U_RECT_H
-#define U_RECT_H
 
+static INLINE void
+u_rect_possible_intersection(const struct u_rect *a,
+                             struct u_rect *b)
+{
+   if (u_rect_test_intersection(a,b)) {
+      u_rect_find_intersection(a,b);
+   }
+   else {
+      b->x0 = b->x1 = b->y0 = b->y1 = 0;
+   }
+}
 
 #include "pipe/p_format.h"
 #include "util/u_pack_color.h"
 
 
+
+/**********************************************************************
+ * Pipe copy/fill rect helpers.
+ */
+
+/* These really should move to a different file:
+ */
+#include "pipe/p_format.h"
+
 extern void
 util_copy_rect(ubyte * dst, enum pipe_format format,
                unsigned dst_stride, unsigned dst_x, unsigned dst_y,
-- 
cgit v1.2.3


From 6c0dc4bafbdbdc0cb4b6e5934fe064226dbd47ec Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 20 Aug 2010 15:52:58 +0100
Subject: draw: specialized cliptesting routines

---
 src/gallium/auxiliary/draw/draw_cliptest_tmp.h     | 114 ++++++++
 src/gallium/auxiliary/draw/draw_context.c          |  31 ++-
 src/gallium/auxiliary/draw/draw_context.h          |   3 +-
 src/gallium/auxiliary/draw/draw_pipe_validate.c    |   2 +-
 src/gallium/auxiliary/draw/draw_private.h          |  12 +-
 src/gallium/auxiliary/draw/draw_pt.c               |   4 +-
 src/gallium/auxiliary/draw/draw_pt.h               |   4 +-
 .../auxiliary/draw/draw_pt_fetch_shade_emit.c      |   2 +-
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |   6 +-
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c       |   6 +-
 src/gallium/auxiliary/draw/draw_pt_post_vs.c       | 288 ++++++++-------------
 src/gallium/drivers/r300/r300_context.c            |   2 -
 src/gallium/drivers/svga/svga_swtnl_draw.c         |   3 +-
 13 files changed, 269 insertions(+), 208 deletions(-)
 create mode 100644 src/gallium/auxiliary/draw/draw_cliptest_tmp.h

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
new file mode 100644
index 0000000000..958ed20dc8
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
@@ -0,0 +1,114 @@
+/**************************************************************************
+ * 
+ * Copyright 2010, VMware, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+
+static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
+                                 struct draw_vertex_info *info )
+{
+   struct vertex_header *out = info->verts;
+   const float *scale = pvs->draw->viewport.scale;
+   const float *trans = pvs->draw->viewport.translate;
+   /* const */ float (*plane)[4] = pvs->draw->plane;
+   const unsigned pos = draw_current_shader_position_output(pvs->draw);
+   const unsigned ef = pvs->draw->vs.edgeflag_output;
+   const unsigned nr = pvs->draw->nr_planes;
+   const unsigned flags = (FLAGS);
+   unsigned need_pipeline = 0;
+   unsigned j;
+
+   for (j = 0; j < info->count; j++) {
+      float *position = out->data[pos];
+      unsigned mask = 0x0;
+  
+      initialize_vertex_header(out);
+
+      if (flags & (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_HALF_Z | DO_CLIP_USER)) {
+         out->clip[0] = position[0];
+         out->clip[1] = position[1];
+         out->clip[2] = position[2];
+         out->clip[3] = position[3];
+
+         /* Do the hardwired planes first:
+          */
+         if (flags & DO_CLIP_XY) {
+            if (-position[0] + position[3] < 0) mask |= (1<<0);
+            if ( position[0] + position[3] < 0) mask |= (1<<1);
+            if (-position[1] + position[3] < 0) mask |= (1<<2);
+            if ( position[1] + position[3] < 0) mask |= (1<<3);
+         }
+
+         /* Clip Z planes according to full cube, half cube or none.
+          */
+         if (flags & DO_CLIP_FULL_Z) {
+            if ( position[2] + position[3] < 0) mask |= (1<<4);
+            if (-position[2] + position[3] < 0) mask |= (1<<5);
+         }
+         else if (flags & DO_CLIP_HALF_Z) {
+            if ( position[2]               < 0) mask |= (1<<4);
+            if (-position[2] + position[3] < 0) mask |= (1<<5);
+         }
+
+         if (flags & DO_CLIP_USER) {
+            unsigned i;
+            for (i = 6; i < nr; i++) {
+               if (dot4(position, plane[i]) < 0) 
+                  mask |= (1<<i);
+            }
+         }
+
+         out->clipmask = mask;
+         need_pipeline |= out->clipmask;
+      }
+
+      if ((flags & DO_VIEWPORT) && mask == 0)
+      {
+	 /* divide by w */
+	 float w = 1.0f / position[3];
+
+	 /* Viewport mapping */
+	 position[0] = position[0] * w * scale[0] + trans[0];
+	 position[1] = position[1] * w * scale[1] + trans[1];
+	 position[2] = position[2] * w * scale[2] + trans[2];
+	 position[3] = w;
+      }
+
+      if ((flags & DO_EDGEFLAG) && ef) {
+         const float *edgeflag = out->data[ef];
+         out->edgeflag = !(edgeflag[0] != 1.0f);
+         need_pipeline |= !out->edgeflag;
+      }
+
+      out = (struct vertex_header *)( (char *)out + info->stride );
+   }
+
+   return need_pipeline != 0;
+}
+
+
+#undef FLAGS
+#undef TAG
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index b39b835f05..937b093479 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -106,6 +106,8 @@ boolean draw_init(struct draw_context *draw)
    ASSIGN_4V( draw->plane[4],  0,  0,  1, 1 ); /* yes these are correct */
    ASSIGN_4V( draw->plane[5],  0,  0, -1, 1 ); /* mesa's a bit wonky */
    draw->nr_planes = 6;
+   draw->clip_xy = 1;
+   draw->clip_z = 1;
 
 
    draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
@@ -186,6 +188,14 @@ void draw_set_mrd(struct draw_context *draw, double mrd)
 }
 
 
+static void update_clip_flags( struct draw_context *draw )
+{
+   draw->clip_xy = !draw->driver.bypass_clip_xy;
+   draw->clip_z = (!draw->driver.bypass_clip_z &&
+                   !draw->depth_clamp);
+   draw->clip_user = (draw->nr_planes > 6);
+}
+
 /**
  * Register new primitive rasterization/rendering state.
  * This causes the drawing pipeline to be rebuilt.
@@ -200,18 +210,25 @@ void draw_set_rasterizer_state( struct draw_context *draw,
       draw->rasterizer = raster;
       draw->rast_handle = rast_handle;
 
-      draw->bypass_clipping = draw->driver.bypass_clipping;
-   }
+  }
 }
 
-
+/* With a little more work, llvmpipe will be able to turn this off and
+ * do its own x/y clipping.  
+ *
+ * Some hardware can turn off clipping altogether - in particular any
+ * hardware with a TNL unit can do its own clipping, even if it is
+ * relying on the draw module for some other reason.
+ */
 void draw_set_driver_clipping( struct draw_context *draw,
-                               boolean bypass_clipping )
+                               boolean bypass_clip_xy,
+                               boolean bypass_clip_z )
 {
    draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
 
-   draw->driver.bypass_clipping = bypass_clipping;
-   draw->bypass_clipping = draw->driver.bypass_clipping;
+   draw->driver.bypass_clip_xy = bypass_clip_xy;
+   draw->driver.bypass_clip_z = bypass_clip_z;
+   update_clip_flags(draw);
 }
 
 
@@ -241,6 +258,8 @@ void draw_set_clip_state( struct draw_context *draw,
    memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0]));
    draw->nr_planes = 6 + clip->nr;
    draw->depth_clamp = clip->depth_clamp;
+
+   update_clip_flags(draw);
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index ea55320c42..4c780e4dcb 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -212,7 +212,8 @@ void draw_set_render( struct draw_context *draw,
 		      struct vbuf_render *render );
 
 void draw_set_driver_clipping( struct draw_context *draw,
-                               boolean bypass_clipping );
+                               boolean bypass_clip_xy,
+                               boolean bypass_clip_z );
 
 void draw_set_force_passthrough( struct draw_context *draw, 
                                  boolean enable );
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index eafa29276f..8b92543987 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -265,7 +265,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
 
    /* Clip stage
     */
-   if (!draw->bypass_clipping)
+   if (draw->clip_xy || draw->clip_z || draw->clip_user)
    {
       draw->pipeline.clip->next = next;
       next = draw->pipeline.clip;
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 7bc3923692..362f563ba6 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -176,13 +176,19 @@ struct draw_context
    } pt;
 
    struct {
-      boolean bypass_clipping;
-      boolean bypass_vs;
+      boolean bypass_clip_xy;
+      boolean bypass_clip_z;
    } driver;
 
    boolean flushing;         /**< debugging/sanity */
    boolean suspend_flushing; /**< internally set */
-   boolean bypass_clipping;  /**< set if either api or driver bypass_clipping true */
+
+   /* Flags set if API requires clipping in these planes and the
+    * driver doesn't indicate that it can do it for us.
+    */
+   boolean clip_xy;
+   boolean clip_z;
+   boolean clip_user;
 
    boolean force_passthrough; /**< never clip or shade */
 
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index f81714d6b4..f44bf2507c 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -86,7 +86,9 @@ draw_pt_arrays(struct draw_context *draw,
          opt |= PT_PIPELINE;
       }
 
-      if (!draw->bypass_clipping && !draw->pt.test_fse) {
+      if ((draw->clip_xy ||
+           draw->clip_z ||
+           draw->clip_user) && !draw->pt.test_fse) {
          opt |= PT_CLIPTEST;
       }
 
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 0db5666529..5fbb424291 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -221,7 +221,9 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
 			     struct draw_vertex_info *info );
 
 void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
-			      boolean bypass_clipping,
+			      boolean clip_xy,
+			      boolean clip_z,
+			      boolean clip_user,
 			      boolean bypass_viewport,
 			      boolean opengl,
 			      boolean need_edgeflags );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 4fbf88844a..7c198c6026 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -102,7 +102,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
                                fse->key.nr_inputs);     /* inputs - fetch from api format */
 
    fse->key.viewport = !draw->identity_viewport;
-   fse->key.clip = !draw->bypass_clipping;
+   fse->key.clip = draw->clip_xy || draw->clip_z || draw->clip_user;
    fse->key.const_vbuffers = 0;
 
    memset(fse->key.element, 0, 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 96b40fb363..b72fd61245 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -100,8 +100,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
     * but gl vs dx9 clip spaces.
     */
    draw_pt_post_vs_prepare( fpme->post_vs,
-			    (boolean)draw->bypass_clipping,
-			    (boolean)draw->identity_viewport,
+			    draw->clip_xy,
+			    draw->clip_z,
+			    draw->clip_user,
+			    draw->identity_viewport,
 			    (boolean)draw->rasterizer->gl_rasterization_rules,
 			    (draw->vs.edgeflag_output ? TRUE : FALSE) );
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index cc0b4e5232..77291e304e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -107,8 +107,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
     * but gl vs dx9 clip spaces.
     */
    draw_pt_post_vs_prepare( fpme->post_vs,
-			    (boolean)draw->bypass_clipping,
-			    (boolean)(draw->identity_viewport),
+			    draw->clip_xy,
+			    draw->clip_z,
+			    draw->clip_user,
+			    draw->identity_viewport,
 			    (boolean)draw->rasterizer->gl_rasterization_rules,
 			    (draw->vs.edgeflag_output ? TRUE : FALSE) );
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 308f927b77..769409cfd6 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -26,14 +26,26 @@
  **************************************************************************/
 
 #include "util/u_memory.h"
+#include "util/u_math.h"
 #include "pipe/p_context.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_pt.h"
 
+
+#define DO_CLIP_XY           0x1
+#define DO_CLIP_FULL_Z       0x2
+#define DO_CLIP_HALF_Z       0x4
+#define DO_CLIP_USER         0x8
+#define DO_VIEWPORT          0x10
+#define DO_EDGEFLAG          0x20
+
+
 struct pt_post_vs {
    struct draw_context *draw;
 
+   unsigned flags;
+
    boolean (*run)( struct pt_post_vs *pvs,
                    struct draw_vertex_info *info );
 };
@@ -56,186 +68,47 @@ dot4(const float *a, const float *b)
            a[3]*b[3]);
 }
 
-static INLINE unsigned
-compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr,
-                    boolean clip_depth)
-{
-   unsigned mask = 0x0;
-   unsigned i;
+#define FLAGS (0)
+#define TAG(x) x##_none
+#include "draw_cliptest_tmp.h"
 
-#if 0
-   debug_printf("compute clipmask %f %f %f %f\n",
-                clip[0], clip[1], clip[2], clip[3]);
-   assert(clip[3] != 0.0);
-#endif
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT)
+#define TAG(x) x##_xy_fullz_viewport
+#include "draw_cliptest_tmp.h"
 
-   /* Do the hardwired planes first:
-    */
-   if (-clip[0] + clip[3] < 0) mask |= (1<<0);
-   if ( clip[0] + clip[3] < 0) mask |= (1<<1);
-   if (-clip[1] + clip[3] < 0) mask |= (1<<2);
-   if ( clip[1] + clip[3] < 0) mask |= (1<<3);
-   if (clip_depth) {
-      if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */
-      if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */
-   }
+#define FLAGS (DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT)
+#define TAG(x) x##_xy_halfz_viewport
+#include "draw_cliptest_tmp.h"
 
-   /* Followed by any remaining ones:
-    */
-   for (i = 6; i < nr; i++) {
-      if (dot4(clip, plane[i]) < 0) 
-         mask |= (1<<i);
-   }
+#define FLAGS (DO_CLIP_FULL_Z | DO_VIEWPORT)
+#define TAG(x) x##_fullz_viewport
+#include "draw_cliptest_tmp.h"
 
-   return mask;
-}
+#define FLAGS (DO_CLIP_HALF_Z | DO_VIEWPORT)
+#define TAG(x) x##_halfz_viewport
+#include "draw_cliptest_tmp.h"
 
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT)
+#define TAG(x) x##_xy_fullz_user_viewport
+#include "draw_cliptest_tmp.h"
 
-/* The normal case - cliptest, rhw divide, viewport transform.
- *
- * Also handle identity viewport here at the expense of a few wasted
- * instructions
- */
-static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
-                                             struct draw_vertex_info *info )
-{
-   struct vertex_header *out = info->verts;
-   const float *scale = pvs->draw->viewport.scale;
-   const float *trans = pvs->draw->viewport.translate;
-   const unsigned pos = draw_current_shader_position_output(pvs->draw);
-   unsigned clipped = 0;
-   unsigned j;
-
-   if (0) debug_printf("%s count, %d\n", __FUNCTION__, info->count);
-
-   for (j = 0; j < info->count; j++) {
-      float *position = out->data[pos];
-
-      initialize_vertex_header(out);
-#if 0
-      debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n",
-                   j, out, position, position[0], position[1], position[2], position[3]);
-#endif
-
-      out->clip[0] = position[0];
-      out->clip[1] = position[1];
-      out->clip[2] = position[2];
-      out->clip[3] = position[3];
-
-      out->vertex_id = 0xffff;
-      /* Disable depth clipping if depth clamping is enabled. */
-      out->clipmask = compute_clipmask_gl(out->clip, 
-					  pvs->draw->plane,
-                                          pvs->draw->nr_planes,
-                                          !pvs->draw->depth_clamp);
-      clipped += out->clipmask;
-
-      if (out->clipmask == 0)
-      {
-	 /* divide by w */
-	 float w = 1.0f / position[3];
-
-	 /* Viewport mapping */
-	 position[0] = position[0] * w * scale[0] + trans[0];
-	 position[1] = position[1] * w * scale[1] + trans[1];
-	 position[2] = position[2] * w * scale[2] + trans[2];
-	 position[3] = w;
-#if 0
-         debug_printf("post viewport: %f %f %f %f\n",
-                      position[0],
-                      position[1],
-                      position[2],
-                      position[3]);
-#endif
-      }
-
-      out = (struct vertex_header *)( (char *)out + info->stride );
-   }
-
-   return clipped != 0;
-}
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT | DO_EDGEFLAG)
+#define TAG(x) x##_xy_fullz_user_viewport_edgeflag
+#include "draw_cliptest_tmp.h"
 
 
 
-/* As above plus edgeflags
+/* Don't want to create 64 versions of this function, so catch the
+ * less common ones here.  This is looking like something which should
+ * be code-generated, perhaps appended to the end of the vertex
+ * shader.
  */
-static boolean
-post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs,
-                                      struct draw_vertex_info *info)
-{
-   unsigned j;
-   boolean needpipe;
-
-   needpipe = post_vs_cliptest_viewport_gl(pvs, info);
-
-   /* If present, copy edgeflag VS output into vertex header.
-    * Otherwise, leave header as is.
-    */
-   if (pvs->draw->vs.edgeflag_output) {
-      struct vertex_header *out = info->verts;
-      int ef = pvs->draw->vs.edgeflag_output;
-
-      for (j = 0; j < info->count; j++) {
-         const float *edgeflag = out->data[ef];
-         out->edgeflag = !(edgeflag[0] != 1.0f);
-         needpipe |= !out->edgeflag;
-         out = (struct vertex_header *)( (char *)out + info->stride );
-      }
-   }
-   return needpipe;
-}
-
+#define FLAGS (pvs->flags)
+#define TAG(x) x##_generic
+#include "draw_cliptest_tmp.h"
 
 
 
-/* If bypass_clipping is set, skip cliptest and rhw divide.
- */
-static boolean post_vs_viewport( struct pt_post_vs *pvs,
-                                 struct draw_vertex_info *info )
-{
-   struct vertex_header *out = info->verts;
-   const float *scale = pvs->draw->viewport.scale;
-   const float *trans = pvs->draw->viewport.translate;
-   const unsigned pos = draw_current_shader_position_output(pvs->draw);
-   unsigned j;
-
-   if (0) debug_printf("%s\n", __FUNCTION__);
-   for (j = 0; j < info->count; j++) {
-      float *position = out->data[pos];
-
-      initialize_vertex_header(out);
-      /* Viewport mapping only, no cliptest/rhw divide
-       */
-      position[0] = position[0] * scale[0] + trans[0];
-      position[1] = position[1] * scale[1] + trans[1];
-      position[2] = position[2] * scale[2] + trans[2];
-
-      out = (struct vertex_header *)((char *)out + info->stride);
-   }
-
-   return FALSE;
-}
-
-
-/* If bypass_clipping is set and we have an identity viewport, nothing
- * to do.
- */
-static boolean post_vs_none( struct pt_post_vs *pvs,
-			     struct draw_vertex_info *info )
-{
-   struct vertex_header *out = info->verts;
-   unsigned j;
-
-   if (0) debug_printf("%s\n", __FUNCTION__);
-   /* just initialize the vertex_id in all headers */
-   for (j = 0; j < info->count; j++) {
-      initialize_vertex_header(out);
-
-      out = (struct vertex_header *)((char *)out + info->stride);
-   }
-   return FALSE;
-}
-
 boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
 			     struct draw_vertex_info *info )
 {
@@ -244,31 +117,72 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
 
 
 void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
-			      boolean bypass_clipping,
+			      boolean clip_xy,
+			      boolean clip_z,
+                              boolean clip_user,
 			      boolean bypass_viewport,
 			      boolean opengl,
 			      boolean need_edgeflags )
 {
-   if (!need_edgeflags) {
-      if (bypass_clipping) {
-         if (bypass_viewport)
-            pvs->run = post_vs_none;
-         else
-            pvs->run = post_vs_viewport;
-      }
-      else {
-         /* if (opengl) */
-         pvs->run = post_vs_cliptest_viewport_gl;
-      }
+   pvs->flags = 0;
+
+   if (clip_xy)
+      pvs->flags |= DO_CLIP_XY;
+   
+   if (clip_z && opengl) {
+      pvs->flags |= DO_CLIP_FULL_Z;
+      ASSIGN_4V( pvs->draw->plane[4],  0,  0,  1, 1 );
+   }
+
+   if (clip_z && !opengl) {
+      pvs->flags |= DO_CLIP_HALF_Z;
+      ASSIGN_4V( pvs->draw->plane[4],  0,  0,  1, 0 );
    }
-   else {
-      /* If we need to copy edgeflags to the vertex header, it should
-       * mean we're running the primitive pipeline.  Hence the bypass
-       * flags should be false.
-       */
-      assert(!bypass_clipping);
-      assert(!bypass_viewport);
-      pvs->run = post_vs_cliptest_viewport_gl_edgeflag;
+
+   if (clip_user)
+      pvs->flags |= DO_CLIP_USER;
+
+   if (!bypass_viewport)
+      pvs->flags |= DO_VIEWPORT;
+
+   if (need_edgeflags)
+      pvs->flags |= DO_EDGEFLAG;
+
+   /* Now select the relevant function:
+    */
+   switch (pvs->flags) {
+   case 0:
+      pvs->run = do_cliptest_none;
+      break;
+
+   case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT:
+      pvs->run = do_cliptest_xy_fullz_viewport;
+      break;
+
+   case DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT:
+      pvs->run = do_cliptest_xy_halfz_viewport;
+      break;
+
+   case DO_CLIP_FULL_Z | DO_VIEWPORT:
+      pvs->run = do_cliptest_fullz_viewport;
+      break;
+
+   case DO_CLIP_HALF_Z | DO_VIEWPORT:
+      pvs->run = do_cliptest_halfz_viewport;
+      break;
+
+   case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT:
+      pvs->run = do_cliptest_xy_fullz_user_viewport;
+      break;
+
+   case (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER |
+         DO_VIEWPORT | DO_EDGEFLAG):
+      pvs->run = do_cliptest_xy_fullz_user_viewport_edgeflag;
+      break;
+      
+   default:
+      pvs->run = do_cliptest_generic;
+      break;
    }
 }
 
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 852d88af54..05f7d09316 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -431,8 +431,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
         r300->draw = draw_create(&r300->context);
         /* Enable our renderer. */
         draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
-        /* Enable Draw's clipping. */
-        draw_set_driver_clipping(r300->draw, FALSE);
         /* Disable converting points/lines to triangles. */
         draw_wide_line_threshold(r300->draw, 10000000.f);
         draw_wide_point_threshold(r300->draw, 10000000.f);
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
index e9eba3b422..814e8edd70 100644
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -151,7 +151,8 @@ boolean svga_init_swtnl( struct svga_context *svga )
    draw_install_aapoint_stage(svga->swtnl.draw, &svga->pipe);
    draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe);
 
-   draw_set_driver_clipping(svga->swtnl.draw, debug_get_bool_option("SVGA_SWTNL_FSE", FALSE));
+   if (debug_get_bool_option("SVGA_SWTNL_FSE", FALSE))
+      draw_set_driver_clipping(svga->swtnl.draw, TRUE, TRUE);
 
    return TRUE;
 
-- 
cgit v1.2.3


From 285ea417ef5ee1027d1e8dd03b069cb157105bf7 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 20 Aug 2010 14:51:57 +0100
Subject: tgsi: helper for dumping tokens as hex

---
 src/gallium/auxiliary/tgsi/tgsi_parse.c | 27 ++++++++++++++++-----------
 src/gallium/auxiliary/tgsi/tgsi_parse.h | 11 +++++++++--
 2 files changed, 25 insertions(+), 13 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index db9a342220..1891203abe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -282,17 +282,6 @@ tgsi_parse_token(
 }
 
 
-unsigned
-tgsi_num_tokens(const struct tgsi_token *tokens)
-{
-   struct tgsi_parse_context ctx;
-   if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) {
-      unsigned len = (ctx.FullHeader.Header.HeaderSize +
-                      ctx.FullHeader.Header.BodySize);
-      return len;
-   }
-   return 0;
-}
 
 
 /**
@@ -319,3 +308,19 @@ tgsi_alloc_tokens(unsigned num_tokens)
    unsigned bytes = num_tokens * sizeof(struct tgsi_token);
    return (struct tgsi_token *) MALLOC(bytes);
 }
+
+
+void
+tgsi_dump_tokens(const struct tgsi_token *tokens)
+{
+   const unsigned *dwords = (const unsigned *)tokens;
+   int nr = tgsi_num_tokens(tokens);
+   int i;
+   
+   assert(sizeof(*tokens) == sizeof(unsigned));
+
+   debug_printf("const unsigned tokens[%d] = {\n", nr);
+   for (i = 0; i < nr; i++)
+      debug_printf("0x%08x,\n", dwords[i]);
+   debug_printf("};\n");
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 36de8807b4..bb2bb0d3d3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -132,8 +132,15 @@ void
 tgsi_parse_token(
    struct tgsi_parse_context *ctx );
 
-unsigned
-tgsi_num_tokens(const struct tgsi_token *tokens);
+static INLINE unsigned
+tgsi_num_tokens(const struct tgsi_token *tokens)
+{
+   struct tgsi_header header = *(const struct tgsi_header *) tokens;
+   return header.HeaderSize + header.BodySize;
+}
+
+void
+tgsi_dump_tokens(const struct tgsi_token *tokens);
 
 struct tgsi_token *
 tgsi_dup_tokens(const struct tgsi_token *tokens);
-- 
cgit v1.2.3


From c65c86cfe73e8cfd903b33a883266b7e08a71723 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Wed, 25 Aug 2010 22:34:31 -0700
Subject: util: Clean up header file inclusion in u_upload_mgr.h.

Remove p_defines.h.
Remove unnecessary forward declarations.
Add forward declaration for pipe_context.
---
 src/gallium/auxiliary/util/u_upload_mgr.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index a124924fc8..de016df02e 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -32,11 +32,8 @@
 #ifndef U_UPLOAD_MGR_H
 #define U_UPLOAD_MGR_H
 
-#include "pipe/p_defines.h"
-
-struct pipe_screen;
+struct pipe_context;
 struct pipe_resource;
-struct u_upload_mgr;
 
 
 struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
-- 
cgit v1.2.3


From 58cfbd697d2a6ca8d00ce17b2783023bc3256019 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Wed, 25 Aug 2010 22:41:20 -0700
Subject: util: Include missing headers in u_tile.h.

Include p_format.h for enum pipe_format symbol.
Include p_state.h for pipe_box symbol.
---
 src/gallium/auxiliary/util/u_tile.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h
index 986eee0743..558351d0ce 100644
--- a/src/gallium/auxiliary/util/u_tile.h
+++ b/src/gallium/auxiliary/util/u_tile.h
@@ -29,7 +29,10 @@
 #define P_TILE_H
 
 #include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
 
+struct pipe_context;
 struct pipe_transfer;
 
 /**
-- 
cgit v1.2.3


From cc3e322d967e51a8c0fa794a310a93ee4b684a91 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Wed, 25 Aug 2010 22:50:15 -0700
Subject: util: Include missing headers in u_split_prim.h.

Include p_compiler.h for boolean symbol.
Include u_debug.h for assert symbol.
---
 src/gallium/auxiliary/util/u_split_prim.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h
index e63a7c1fad..8af8a7e71d 100644
--- a/src/gallium/auxiliary/util/u_split_prim.h
+++ b/src/gallium/auxiliary/util/u_split_prim.h
@@ -1,5 +1,8 @@
 /* Originally written by Ben Skeggs for the nv50 driver*/
-#include <pipe/p_defines.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_debug.h"
 
 struct util_split_prim {
    void *priv;
-- 
cgit v1.2.3


From c7111f321ca16f2c72cc59975b728a566daae95a Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Wed, 25 Aug 2010 22:55:15 -0700
Subject: util: Add include guard in u_split_prim.h.

---
 src/gallium/auxiliary/util/u_split_prim.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h
index 8af8a7e71d..7f80fc1270 100644
--- a/src/gallium/auxiliary/util/u_split_prim.h
+++ b/src/gallium/auxiliary/util/u_split_prim.h
@@ -1,4 +1,8 @@
 /* Originally written by Ben Skeggs for the nv50 driver*/
+
+#ifndef U_SPLIT_PRIM_H
+#define U_SPLIT_PRIM_H
+
 #include "pipe/p_defines.h"
 #include "pipe/p_compiler.h"
 
@@ -106,3 +110,5 @@ util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
    s->p_start += (max_verts - repeat);
    return FALSE;
 }
+
+#endif /* U_SPLIT_PRIM_H */
-- 
cgit v1.2.3


From d2dd23e85890d697ea6d848f0a3a03fe283edb0a Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Wed, 25 Aug 2010 23:04:39 -0700
Subject: util: Include missing header in u_dirty_surfaces.h.

Include p_state.h for pipe_surface symbol.
---
 src/gallium/auxiliary/util/u_dirty_surfaces.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_dirty_surfaces.h b/src/gallium/auxiliary/util/u_dirty_surfaces.h
index c157300502..fd1bbe5ffd 100644
--- a/src/gallium/auxiliary/util/u_dirty_surfaces.h
+++ b/src/gallium/auxiliary/util/u_dirty_surfaces.h
@@ -27,9 +27,13 @@
 #ifndef U_DIRTY_SURFACES_H_
 #define U_DIRTY_SURFACES_H_
 
+#include "pipe/p_state.h"
+
 #include "util/u_double_list.h"
 #include "util/u_math.h"
 
+struct pipe_context;
+
 typedef void (*util_dirty_surface_flush_t) (struct pipe_context *, struct pipe_surface *);
 
 struct util_dirty_surfaces
-- 
cgit v1.2.3


From 3e41029d6e4e89a52679303d50b7c6b7c1c58f41 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Wed, 25 Aug 2010 23:31:04 -0700
Subject: util: Include missing header in u_bitmask.h.

Include p_compiler.h for boolean symbol.
---
 src/gallium/auxiliary/util/u_bitmask.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_bitmask.h b/src/gallium/auxiliary/util/u_bitmask.h
index 87f1110296..98b85ddecd 100644
--- a/src/gallium/auxiliary/util/u_bitmask.h
+++ b/src/gallium/auxiliary/util/u_bitmask.h
@@ -36,6 +36,9 @@
 #define U_HANDLE_BITMASK_H_
 
 
+#include "pipe/p_compiler.h"
+
+
 #ifdef __cplusplus
 extern "C" {
 #endif
-- 
cgit v1.2.3


From d8ad10dc45d39978a25a300a386440a5cb39a40d Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Wed, 25 Aug 2010 23:37:27 -0700
Subject: util: Include missing header in u_blit.h.

Include p_compiler.h for uint symbol.
Clean up forward declarations.
---
 src/gallium/auxiliary/util/u_blit.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h
index ef95134f32..b8a0dfce13 100644
--- a/src/gallium/auxiliary/util/u_blit.h
+++ b/src/gallium/auxiliary/util/u_blit.h
@@ -30,18 +30,20 @@
 #define U_BLIT_H
 
 
+#include "pipe/p_compiler.h"
+
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
    
+struct cso_context;
 struct pipe_context;
-struct pipe_surface;
 struct pipe_resource;
-struct cso_context;
-
-
-struct blit_state;
+struct pipe_sampler_view;
+struct pipe_subresource;
+struct pipe_surface;
 
 
 extern struct blit_state *
-- 
cgit v1.2.3


From 7822f99193cd26558bff29ff8d6d23db2d3a1048 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Thu, 26 Aug 2010 00:22:19 -0700
Subject: pipebuffer: Clean up header file inclusion in pb_bufmgr.h.

Remove p_compiler.h and p_defines.h.
Include pb_buffer.h for pb_size symbol.
---
 src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index cec2524da2..2ef02160f2 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -50,8 +50,7 @@
 #define PB_BUFMGR_H_
 
 
-#include "pipe/p_compiler.h"
-#include "pipe/p_defines.h"
+#include "pb_buffer.h"
 
 
 #ifdef __cplusplus
-- 
cgit v1.2.3


From b47af6ad6d40773141aeee5bbfbfdffb57dd2bd8 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Thu, 26 Aug 2010 00:29:58 -0700
Subject: rtasm: Include missing header in rtasm_x86sse.h.

Include p_compiler.h for stdint.h uint*_t symbols.
---
 src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index aa77892b2d..2b9678b176 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -24,6 +24,7 @@
 #ifndef _RTASM_X86SSE_H_
 #define _RTASM_X86SSE_H_
 
+#include "pipe/p_compiler.h"
 #include "pipe/p_config.h"
 
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-- 
cgit v1.2.3


From f099e73b7b7ce1ae1aa23713c6418deb86b1a17a Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Thu, 26 Aug 2010 00:34:30 -0700
Subject: tgsi: Include missing header in tgsi_sse2.h.

Include p_compiler.h for boolean symbol.
Clean up forward declarations.
---
 src/gallium/auxiliary/tgsi/tgsi_sse2.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
index d81ee3d00e..00aa8b84fe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
@@ -32,9 +32,12 @@
 extern "C" {
 #endif
 
+#include "pipe/p_compiler.h"
+
+struct tgsi_exec_machine;
+struct tgsi_interp_coef;
 struct tgsi_token;
 struct x86_function;
-struct tgsi_interp_coef;
 
 unsigned
 tgsi_emit_sse2(
-- 
cgit v1.2.3


From 57ce0de8cbdf09d7322e3930c25c0ba5e82ff2a9 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Thu, 26 Aug 2010 01:08:30 -0700
Subject: util: Include missing header in u_simple_shaders.c.

Include p_state.h for PIPE_MAX_COLOR_BUFS symbol.
---
 src/gallium/auxiliary/util/u_simple_shaders.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index 5b682f496c..58ef68377f 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -37,6 +37,7 @@
 
 #include "pipe/p_context.h"
 #include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
 #include "util/u_simple_shaders.h"
 #include "util/u_debug.h"
 #include "tgsi/tgsi_ureg.h"
-- 
cgit v1.2.3


From 0b9b8694d9c3295436561331f03f0d59effe26c4 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Fri, 27 Aug 2010 00:26:59 -0700
Subject: tgsi: Include missing header in tgsi_dump.h.

Include p_compiler.h for uint symbol.
---
 src/gallium/auxiliary/tgsi/tgsi_dump.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index 4cd27317b3..dd78b36100 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -28,6 +28,7 @@
 #ifndef TGSI_DUMP_H
 #define TGSI_DUMP_H
 
+#include "pipe/p_compiler.h"
 #include "pipe/p_shader_tokens.h"
 
 #if defined __cplusplus
-- 
cgit v1.2.3


From 57421cb464c63ed65f5e0438ad4c13c527f41118 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Fri, 27 Aug 2010 00:31:27 -0700
Subject: tgsi: Include missing header in tgsi_info.h.

Include p_compiler.h for uint symbol.
---
 src/gallium/auxiliary/tgsi/tgsi_info.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
index 50248884fd..1992d11bbe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -28,6 +28,7 @@
 #ifndef TGSI_INFO_H
 #define TGSI_INFO_H
 
+#include "pipe/p_compiler.h"
 #include "pipe/p_shader_tokens.h"
 
 #if defined __cplusplus
-- 
cgit v1.2.3


From ec21ed1ce7963551d824b8b1f4c4ffa8d6cb3363 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Fri, 27 Aug 2010 00:34:32 -0700
Subject: tgsi: Include missing header in tgsi_parse.h.

Include p_compiler.h for boolean and INLINE symbols.
---
 src/gallium/auxiliary/tgsi/tgsi_parse.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index bb2bb0d3d3..d4df585176 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -28,6 +28,7 @@
 #ifndef TGSI_PARSE_H
 #define TGSI_PARSE_H
 
+#include "pipe/p_compiler.h"
 #include "pipe/p_shader_tokens.h"
 
 #if defined __cplusplus
-- 
cgit v1.2.3


From e826d0e8170028da553d2018b833af7c26b8dc1b Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 26 Aug 2010 20:03:03 +0100
Subject: util: add MIN4, MAX4

---
 src/gallium/auxiliary/util/u_math.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index fe19466436..6ba4e24f4c 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -566,6 +566,9 @@ util_bswap16(uint16_t n)
 #define MIN3( A, B, C ) MIN2( MIN2( A, B ), C )
 #define MAX3( A, B, C ) MAX2( MAX2( A, B ), C )
 
+#define MIN4( A, B, C, D ) MIN2( MIN2( A, B ), MIN2(C, D) )
+#define MAX4( A, B, C, D ) MAX2( MAX2( A, B ), MIN2(C, D) )
+
 
 /**
  * Align a value, only works pot alignemnts.
-- 
cgit v1.2.3


From 04f8560dd826b62e96da5deed43910f767953707 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 27 Aug 2010 13:29:00 +0100
Subject: util: fix typo in MAX4

Thanks to Michal for spotting it.
---
 src/gallium/auxiliary/util/u_math.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 6ba4e24f4c..af510dac51 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -567,7 +567,7 @@ util_bswap16(uint16_t n)
 #define MAX3( A, B, C ) MAX2( MAX2( A, B ), C )
 
 #define MIN4( A, B, C, D ) MIN2( MIN2( A, B ), MIN2(C, D) )
-#define MAX4( A, B, C, D ) MAX2( MAX2( A, B ), MIN2(C, D) )
+#define MAX4( A, B, C, D ) MAX2( MAX2( A, B ), MAX2(C, D) )
 
 
 /**
-- 
cgit v1.2.3


From 9112e531d4c26ea88a31c05fe2bc8cc613b76b65 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 28 Aug 2010 14:18:57 -0700
Subject: draw: Include missing header in draw_vs_llvm.c.

Include p_screen.h for completely type to pipe_screen.
---
 src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index 0014863454..fa9992db78 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -28,6 +28,7 @@
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
+#include "pipe/p_screen.h"
 
 #include "draw_private.h"
 #include "draw_context.h"
-- 
cgit v1.2.3


From 89b2897220acfacdc431f138377fbcec9f0ea812 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sun, 29 Aug 2010 06:03:39 +0200
Subject: util: remove util_is_pot in favor of util_is_power_of_two

The function was duplicated.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c       |  2 +-
 src/gallium/auxiliary/gallivm/lp_bld_debug.c      |  2 +-
 src/gallium/auxiliary/gallivm/lp_bld_format_aos.c |  4 ++--
 src/gallium/auxiliary/gallivm/lp_bld_sample.c     |  6 +++---
 src/gallium/auxiliary/util/u_math.h               | 10 ----------
 src/gallium/drivers/galahad/glhd_screen.c         |  2 +-
 6 files changed, 8 insertions(+), 18 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index bb30e6e9df..7bb57061f5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -538,7 +538,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
    if(b == 2 && bld->type.floating)
       return lp_build_add(bld, a, a);
 
-   if(util_is_pot(b)) {
+   if(util_is_power_of_two(b)) {
       unsigned shift = ffs(b) - 1;
 
       if(bld->type.floating) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
index 39dfc51e50..d3a5afff8c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
@@ -46,7 +46,7 @@
 boolean
 lp_check_alignment(const void *ptr, unsigned alignment)
 {
-   assert(util_is_pot(alignment));
+   assert(util_is_power_of_two(alignment));
    return ((uintptr_t)ptr & (alignment - 1)) == 0;
 }
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 247cb83ce6..92123e09d3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -388,7 +388,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
 
    if (format_matches_type(format_desc, type) &&
        format_desc->block.bits <= type.width * 4 &&
-       util_is_pot(format_desc->block.bits)) {
+       util_is_power_of_two(format_desc->block.bits)) {
       LLVMValueRef packed;
 
       /*
@@ -416,7 +416,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
        format_desc->block.width == 1 &&
        format_desc->block.height == 1 &&
-       util_is_pot(format_desc->block.bits) &&
+       util_is_power_of_two(format_desc->block.bits) &&
        format_desc->block.bits <= 32 &&
        format_desc->is_bitmask &&
        !format_desc->is_mixed &&
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 0fd014ab9b..3c4992b25e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -82,9 +82,9 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
    state->swizzle_a         = view->swizzle_a;
 
    state->target            = texture->target;
-   state->pot_width         = util_is_pot(texture->width0);
-   state->pot_height        = util_is_pot(texture->height0);
-   state->pot_depth         = util_is_pot(texture->depth0);
+   state->pot_width         = util_is_power_of_two(texture->width0);
+   state->pot_height        = util_is_power_of_two(texture->height0);
+   state->pot_depth         = util_is_power_of_two(texture->depth0);
 
    state->wrap_s            = sampler->wrap_s;
    state->wrap_t            = sampler->wrap_t;
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index af510dac51..69a7681494 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -360,16 +360,6 @@ util_is_inf_or_nan(float x)
 }
 
 
-/**
- * Test whether x is a power of two.
- */
-static INLINE boolean
-util_is_pot(unsigned x)
-{
-   return (x & (x - 1)) == 0;
-}
-
-
 /**
  * Find first bit set in word.  Least significant bit is 1.
  * Return 0 if no bits set.
diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c
index a4eac11ae3..75e4c2d82e 100644
--- a/src/gallium/drivers/galahad/glhd_screen.c
+++ b/src/gallium/drivers/galahad/glhd_screen.c
@@ -140,7 +140,7 @@ galahad_screen_resource_create(struct pipe_screen *_screen,
 
    if(templat->target != PIPE_TEXTURE_RECT && templat->target != PIPE_BUFFER && !screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES))
    {
-      if(!util_is_pot(templat->width0) || !util_is_pot(templat->height0))
+      if(!util_is_power_of_two(templat->width0) || !util_is_power_of_two(templat->height0))
          glhd_warn("Requested NPOT (%ux%u) non-rectangle texture without NPOT support", templat->width0, templat->height0);
    }
 
-- 
cgit v1.2.3


From a922725118333e016a357008f37105c23c6f54bc Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sun, 29 Aug 2010 06:08:24 +0200
Subject: r300g,u_blitter: use u_framebuffer

Removing another function duplication in u_blitter.
---
 src/gallium/auxiliary/util/u_blitter.c  |  2 +-
 src/gallium/auxiliary/util/u_blitter.h  | 36 ++-------------------------------
 src/gallium/drivers/r300/r300_context.c |  2 +-
 src/gallium/drivers/r300/r300_state.c   |  4 ++--
 4 files changed, 6 insertions(+), 38 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 8f93dac011..f93ef26ae7 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -320,7 +320,7 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
     */
    if (ctx->base.saved_fb_state.nr_cbufs != ~0) {
       pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state);
-      util_assign_framebuffer_state(&ctx->base.saved_fb_state, NULL);
+      util_unreference_framebuffer_state(&ctx->base.saved_fb_state);
       ctx->base.saved_fb_state.nr_cbufs = ~0;
    }
 
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index f316587dea..e33d2e283f 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -27,6 +27,7 @@
 #ifndef U_BLITTER_H
 #define U_BLITTER_H
 
+#include "util/u_framebuffer.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 
@@ -258,45 +259,12 @@ void util_blitter_save_vertex_shader(struct blitter_context *blitter,
    blitter->saved_vs = vs;
 }
 
-/* XXX This should probably be moved elsewhere. */
-static INLINE
-void util_assign_framebuffer_state(struct pipe_framebuffer_state *dst,
-                                   const struct pipe_framebuffer_state *src)
-{
-   unsigned i;
-
-   if (src) {
-      /* Reference all surfaces. */
-      for (i = 0; i < src->nr_cbufs; i++) {
-         pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
-      }
-      for (; i < dst->nr_cbufs; i++) {
-         pipe_surface_reference(&dst->cbufs[i], NULL);
-      }
-
-      pipe_surface_reference(&dst->zsbuf, src->zsbuf);
-
-      dst->nr_cbufs = src->nr_cbufs;
-      dst->width = src->width;
-      dst->height = src->height;
-   } else {
-      /* Set all surfaces to NULL. */
-      for (i = 0; i < dst->nr_cbufs; i++) {
-         pipe_surface_reference(&dst->cbufs[i], NULL);
-      }
-
-      pipe_surface_reference(&dst->zsbuf, NULL);
-
-      dst->nr_cbufs = 0;
-   }
-}
-
 static INLINE
 void util_blitter_save_framebuffer(struct blitter_context *blitter,
                                    const struct pipe_framebuffer_state *state)
 {
    blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */
-   util_assign_framebuffer_state(&blitter->saved_fb_state, state);
+   util_copy_framebuffer_state(&blitter->saved_fb_state, state);
 }
 
 static INLINE
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 05f7d09316..624dadd07d 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -65,7 +65,7 @@ static void r300_release_referenced_objects(struct r300_context *r300)
     unsigned i;
 
     /* Framebuffer state. */
-    util_assign_framebuffer_state(fb, NULL);
+    util_unreference_framebuffer_state(fb);
 
     /* Textures. */
     for (i = 0; i < textures->sampler_view_count; i++)
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 9adaea3235..8ccb63964e 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -23,7 +23,7 @@
 
 #include "draw/draw_context.h"
 
-#include "util/u_blitter.h"
+#include "util/u_framebuffer.h"
 #include "util/u_math.h"
 #include "util/u_mm.h"
 #include "util/u_memory.h"
@@ -748,7 +748,7 @@ static void
     /* The tiling flags are dependent on the surface miplevel, unfortunately. */
     r300_fb_set_tiling_flags(r300, state);
 
-    util_assign_framebuffer_state(r300->fb_state.state, state);
+    util_copy_framebuffer_state(r300->fb_state.state, state);
 
     r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
 
-- 
cgit v1.2.3


From 1f3e6e9726e3b41f4deeacbb34b9e23c5b3d6f76 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 28 Aug 2010 21:42:28 -0700
Subject: util: Add forward declaration in u_transfer.h.

---
 src/gallium/auxiliary/util/u_transfer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h
index eb07945d15..e3a38730f2 100644
--- a/src/gallium/auxiliary/util/u_transfer.h
+++ b/src/gallium/auxiliary/util/u_transfer.h
@@ -8,6 +8,7 @@
 #include "pipe/p_state.h"
 
 struct pipe_context;
+struct winsys_handle;
 
 boolean u_default_resource_get_handle(struct pipe_screen *screen,
 				      struct pipe_resource *resource,
-- 
cgit v1.2.3


From b812ff8f9e5c9d292c0fb9518df1d35165542556 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 28 Aug 2010 21:46:41 -0700
Subject: util: Include missing header in u_draw.h.

Include p_state.h for complete type to pipe_draw_info.
---
 src/gallium/auxiliary/util/u_draw.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h
index 2a91ea0f9a..f06d09ef91 100644
--- a/src/gallium/auxiliary/util/u_draw.h
+++ b/src/gallium/auxiliary/util/u_draw.h
@@ -31,6 +31,7 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_context.h"
+#include "pipe/p_state.h"
 
 
 static INLINE void
-- 
cgit v1.2.3


From e18c7f68b4a18ba3f9ebfd0a4a24e3528cf44800 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 21 Aug 2010 11:31:22 +0100
Subject: gallivm: Fix lp_build_sum_vector.

The result is scalar, so when argument is zero/undef we can pass vector
zero/undef.

Also, support the scalar case.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 7bb57061f5..e0d30be98d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -259,7 +259,7 @@ lp_build_add(struct lp_build_context *bld,
 }
 
 
-/** Return the sum of the elements of a */
+/** Return the scalar sum of the elements of a */
 LLVMValueRef
 lp_build_sum_vector(struct lp_build_context *bld,
                     LLVMValueRef a)
@@ -270,11 +270,9 @@ lp_build_sum_vector(struct lp_build_context *bld,
 
    assert(lp_check_value(type, a));
 
-   if (a == bld->zero)
-      return bld->zero;
-   if (a == bld->undef)
-      return bld->undef;
-   assert(type.length > 1);
+   if (type.length == 1) {
+      return a;
+   }
 
    assert(!bld->type.norm);
 
-- 
cgit v1.2.3


From 7a08dbcf55d4c959907086a5e4851e0cab0b9f67 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 21 Aug 2010 11:29:41 +0100
Subject: gallivm: Correct copy'n'pasted comments.

---
 src/gallium/auxiliary/gallivm/lp_bld_type.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index 3ffe916f8e..fec1d3dfbc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -128,16 +128,16 @@ struct lp_build_context
     */
    struct lp_type type;
 
-   /** Same as lp_build_undef(type) */
+   /** Same as lp_build_elem_type(type) */
    LLVMTypeRef elem_type;
 
-   /** Same as lp_build_undef(type) */
+   /** Same as lp_build_vec_type(type) */
    LLVMTypeRef vec_type;
 
-   /** Same as lp_build_undef(type) */
+   /** Same as lp_build_int_elem_type(type) */
    LLVMTypeRef int_elem_type;
 
-   /** Same as lp_build_undef(type) */
+   /** Same as lp_build_int_vec_type(type) */
    LLVMTypeRef int_vec_type;
 
    /** Same as lp_build_undef(type) */
-- 
cgit v1.2.3


From e4c3e7f9d8eae05c83f6e1fc54dc63ded3c12d12 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 29 Aug 2010 12:05:36 +0100
Subject: gallivm: Disable LLVM's pretty stack trace dumper.

By default LLVM adds a signal handler to output a pretty stack trace.
This signal handler is never removed, causing problems when unloading
the shared object where the gallium driver resides.

Thanks to Chris Li for finding this.
---
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 92f9adfc18..48baf7c425 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -40,6 +40,7 @@
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
 #include <llvm/ExecutionEngine/JITEventListener.h>
 #include <llvm/Support/CommandLine.h>
+#include <llvm/Support/PrettyStackTrace.h>
 
 #include "pipe/p_config.h"
 #include "util/u_debug.h"
@@ -161,6 +162,13 @@ lp_set_target_options(void)
       llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
       first = FALSE;
    }
+
+   /*
+    * By default LLVM adds a signal handler to output a pretty stack trace.
+    * This signal handler is never removed, causing problems when unloading the
+    * shared object where the gallium driver resides.
+    */
+   llvm::DisablePrettyStackTrace = true;
 }
 
 
-- 
cgit v1.2.3


From 0a6c908e0d2d1721421f7b26d73975f4f61e24a2 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 7 Jun 2010 12:05:18 +0100
Subject: gallivm: Compute the 4 texel offsets for linear filtering en
 ensemble.

---
 src/gallium/auxiliary/gallivm/lp_bld_sample.c     |  97 ++++---
 src/gallium/auxiliary/gallivm/lp_bld_sample.h     |   9 +
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 300 +++++++++++++++-------
 3 files changed, 280 insertions(+), 126 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 3c4992b25e..259b1142e3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -123,6 +123,52 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
 }
 
 
+/**
+ * Compute the partial offset of a pixel block along an arbitrary axis.
+ *
+ * @param coord   coordinate in pixels
+ * @param stride  number of bytes between rows of successive pixel blocks
+ * @param block_length  number of pixels in a pixels block along the coordinate
+ *                      axis
+ * @param out_offset    resulting relative offset of the pixel block in bytes
+ * @param out_subcoord  resulting sub-block pixel coordinate
+ */
+void
+lp_build_sample_partial_offset(struct lp_build_context *bld,
+                               unsigned block_length,
+                               LLVMValueRef coord,
+                               LLVMValueRef stride,
+                               LLVMValueRef *out_offset,
+                               LLVMValueRef *out_subcoord)
+{
+   LLVMValueRef offset;
+   LLVMValueRef subcoord;
+
+   if (block_length == 1) {
+      subcoord = bld->zero;
+   }
+   else {
+      /*
+       * Pixel blocks have power of two dimensions. LLVM should convert the
+       * rem/div to bit arithmetic.
+       * TODO: Verify this.
+       */
+
+      LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
+      subcoord = LLVMBuildURem(bld->builder, coord, block_width, "");
+      coord    = LLVMBuildUDiv(bld->builder, coord, block_width, "");
+   }
+
+   offset = lp_build_mul(bld, coord, stride);
+
+   assert(out_offset);
+   assert(out_subcoord);
+
+   *out_offset = offset;
+   *out_subcoord = subcoord;
+}
+
+
 /**
  * Compute the offset of a pixel block.
  *
@@ -144,48 +190,35 @@ lp_build_sample_offset(struct lp_build_context *bld,
 {
    LLVMValueRef x_stride;
    LLVMValueRef offset;
-   LLVMValueRef i;
-   LLVMValueRef j;
-
-   /*
-    * Describe the coordinates in terms of pixel blocks.
-    *
-    * TODO: pixel blocks are power of two. LLVM should convert rem/div to
-    * bit arithmetic. Verify this.
-    */
-
-   if (format_desc->block.width == 1) {
-      i = bld->zero;
-   }
-   else {
-      LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width);
-      i = LLVMBuildURem(bld->builder, x, block_width, "");
-      x = LLVMBuildUDiv(bld->builder, x, block_width, "");
-   }
-
-   if (format_desc->block.height == 1) {
-      j = bld->zero;
-   }
-   else {
-      LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height);
-      j = LLVMBuildURem(bld->builder, y, block_height, "");
-      y = LLVMBuildUDiv(bld->builder, y, block_height, "");
-   }
 
    x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
-   offset = lp_build_mul(bld, x, x_stride);
+
+   lp_build_sample_partial_offset(bld,
+                                  format_desc->block.width,
+                                  x, x_stride,
+                                  &offset, out_i);
 
    if (y && y_stride) {
-      LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
+      LLVMValueRef y_offset;
+      lp_build_sample_partial_offset(bld,
+                                     format_desc->block.height,
+                                     y, y_stride,
+                                     &y_offset, out_j);
       offset = lp_build_add(bld, offset, y_offset);
    }
+   else {
+      *out_j = bld->zero;
+   }
 
    if (z && z_stride) {
-      LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
+      LLVMValueRef z_offset;
+      LLVMValueRef k;
+      lp_build_sample_partial_offset(bld,
+                                     1, /* pixel blocks are always 2D */
+                                     z, z_stride,
+                                     &z_offset, &k);
       offset = lp_build_add(bld, offset, z_offset);
    }
 
    *out_offset = offset;
-   *out_i = i;
-   *out_j = j;
 }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index aff7bb2a4d..caafc4eca0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -148,6 +148,15 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
                         const struct pipe_sampler_state *sampler);
 
 
+void
+lp_build_sample_partial_offset(struct lp_build_context *bld,
+                               unsigned block_length,
+                               LLVMValueRef coord,
+                               LLVMValueRef stride,
+                               LLVMValueRef *out_offset,
+                               LLVMValueRef *out_i);
+
+
 void
 lp_build_sample_offset(struct lp_build_context *bld,
                        const struct util_format_description *format_desc,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index f6b6162f63..1f39d9c98b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -322,59 +322,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
 }
 
 
-/**
- * Fetch the texels as <4n x i8> in AoS form.
- */
-static LLVMValueRef
-lp_build_sample_packed(struct lp_build_sample_context *bld,
-                       LLVMValueRef x,
-                       LLVMValueRef y,
-                       LLVMValueRef y_stride,
-                       LLVMValueRef data_array)
-{
-   LLVMValueRef offset, i, j;
-   LLVMValueRef data_ptr;
-   LLVMValueRef res;
-
-   /* convert x,y,z coords to linear offset from start of texture, in bytes */
-   lp_build_sample_offset(&bld->uint_coord_bld,
-                          bld->format_desc,
-                          x, y, NULL, y_stride, NULL,
-                          &offset, &i, &j);
-
-   /* get pointer to mipmap level 0 data */
-   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
-
-   if (util_format_is_rgba8_variant(bld->format_desc)) {
-      /* Just fetch the data directly without swizzling */
-      assert(bld->format_desc->block.width == 1);
-      assert(bld->format_desc->block.height == 1);
-      assert(bld->format_desc->block.bits <= bld->texel_type.width);
-
-      res = lp_build_gather(bld->builder,
-                            bld->texel_type.length,
-                            bld->format_desc->block.bits,
-                            bld->texel_type.width,
-                            data_ptr, offset);
-   }
-   else {
-      struct lp_type type;
-
-      assert(bld->texel_type.width == 32);
-
-      memset(&type, 0, sizeof type);
-      type.width = 8;
-      type.length = bld->texel_type.length*4;
-      type.norm = TRUE;
-
-      res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
-                                    data_ptr, offset, i, j);
-   }
-
-   return res;
-}
-
-
 /**
  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
  */
@@ -409,7 +356,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
 
 
 /**
- * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
+ * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time.
  * Return whether the given mode is supported by that function.
  */
 static boolean
@@ -431,13 +378,18 @@ is_simple_wrap_mode(unsigned mode)
  * \param length  the texture size along one dimension
  * \param is_pot  if TRUE, length is a power of two
  * \param wrap_mode  one of PIPE_TEX_WRAP_x
+ * \param i0  resulting sub-block pixel coordinate for coord0
  */
-static LLVMValueRef
-lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
-                         LLVMValueRef coord,
-                         LLVMValueRef length,
-                         boolean is_pot,
-                         unsigned wrap_mode)
+static void
+lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
+                                 unsigned block_length,
+                                 LLVMValueRef coord,
+                                 LLVMValueRef length,
+                                 LLVMValueRef stride,
+                                 boolean is_pot,
+                                 unsigned wrap_mode,
+                                 LLVMValueRef *out_offset,
+                                 LLVMValueRef *out_i)
 {
    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
@@ -470,7 +422,134 @@ lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
       assert(0);
    }
 
-   return coord;
+   lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
+                                  out_offset, out_i);
+}
+
+
+/**
+ * Build LLVM code for texture wrap mode, for scaled integer texcoords.
+ * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
+ * \param length  the texture size along one dimension
+ * \param stride  pixel stride along the coordinate axis
+ * \param block_length  is the length of the pixel block along the
+ *                      coordinate axis
+ * \param is_pot  if TRUE, length is a power of two
+ * \param wrap_mode  one of PIPE_TEX_WRAP_x
+ * \param offset0  resulting relative offset for coord0
+ * \param offset1  resulting relative offset for coord0 + 1
+ * \param i0  resulting sub-block pixel coordinate for coord0
+ * \param i1  resulting sub-block pixel coordinate for coord0 + 1
+ */
+static void
+lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
+                                unsigned block_length,
+                                LLVMValueRef coord0,
+                                LLVMValueRef length,
+                                LLVMValueRef stride,
+                                boolean is_pot,
+                                unsigned wrap_mode,
+                                LLVMValueRef *offset0,
+                                LLVMValueRef *offset1,
+                                LLVMValueRef *i0,
+                                LLVMValueRef *i1)
+{
+   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+   LLVMValueRef length_minus_one;
+   LLVMValueRef lmask, umask, mask;
+
+   if (block_length != 1) {
+      /*
+       * If the pixel block covers more than one pixel then there is no easy
+       * way to calculate offset1 relative to offset0. Instead, compute them
+       * independently.
+       */
+
+      LLVMValueRef coord1;
+
+      lp_build_sample_wrap_nearest_int(bld,
+                                       block_length,
+                                       coord0,
+                                       length,
+                                       stride,
+                                       is_pot,
+                                       wrap_mode,
+                                       offset0, i0);
+
+      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+
+      lp_build_sample_wrap_nearest_int(bld,
+                                       block_length,
+                                       coord1,
+                                       length,
+                                       stride,
+                                       is_pot,
+                                       wrap_mode,
+                                       offset1, i1);
+
+      return;
+   }
+
+   /*
+    * Scalar pixels -- try to compute offset0 and offset1 with a single stride
+    * multiplication.
+    */
+
+   *i0 = uint_coord_bld->zero;
+   *i1 = uint_coord_bld->zero;
+
+   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+   switch(wrap_mode) {
+   case PIPE_TEX_WRAP_REPEAT:
+      if (is_pot) {
+         coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
+      }
+      else {
+         /* Signed remainder won't give the right results for negative
+          * dividends but unsigned remainder does.*/
+         coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
+      }
+
+      mask = lp_build_compare(bld->builder, int_coord_bld->type,
+                              PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
+
+      *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+      *offset1 = LLVMBuildAnd(bld->builder,
+                              lp_build_add(uint_coord_bld, *offset0, stride),
+                              mask, "");
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+                               PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
+      umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+                               PIPE_FUNC_LESS, coord0, length_minus_one);
+
+      coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
+      coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
+
+      mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
+
+      *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+      *offset1 = lp_build_add(uint_coord_bld,
+                              *offset0,
+                              LLVMBuildAnd(bld->builder, stride, mask, ""));
+      break;
+
+   case PIPE_TEX_WRAP_CLAMP:
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+   default:
+      assert(0);
+      *offset0 = uint_coord_bld->zero;
+      *offset1 = uint_coord_bld->zero;
+      break;
+   }
 }
 
 
@@ -1741,14 +1820,18 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    LLVMValueRef i32_c8, i32_c128, i32_c255;
    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
-   LLVMValueRef x0, x1;
-   LLVMValueRef y0, y1;
-   LLVMValueRef neighbors[2][2];
+   LLVMValueRef data_ptr;
+   LLVMValueRef x_stride, y_stride;
+   LLVMValueRef x_offset0, x_offset1;
+   LLVMValueRef y_offset0, y_offset1;
+   LLVMValueRef offset[2][2];
+   LLVMValueRef x_subcoord[2], y_subcoord[2];
    LLVMValueRef neighbors_lo[2][2];
    LLVMValueRef neighbors_hi[2][2];
    LLVMValueRef packed, packed_lo, packed_hi;
    LLVMValueRef unswizzled[4];
-   LLVMValueRef stride;
+   const unsigned level = 0;
+   unsigned i, j;
 
    assert(bld->static_state->target == PIPE_TEXTURE_2D
          || bld->static_state->target == PIPE_TEXTURE_RECT);
@@ -1795,21 +1878,30 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
    t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
 
-   x0 = s_ipart;
-   y0 = t_ipart;
-
-   x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
-   y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
-
-   x0 = lp_build_sample_wrap_int(bld, x0, width,  bld->static_state->pot_width,
-                                 bld->static_state->wrap_s);
-   y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
-                                 bld->static_state->wrap_t);
-
-   x1 = lp_build_sample_wrap_int(bld, x1, width,  bld->static_state->pot_width,
-                                 bld->static_state->wrap_s);
-   y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
-                                 bld->static_state->wrap_t);
+   x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+                                 bld->format_desc->block.bits/8);
+
+   y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level);
+
+   lp_build_sample_wrap_linear_int(bld,
+                                   bld->format_desc->block.width,
+                                   s_ipart, width, x_stride,
+                                   bld->static_state->pot_width,
+                                   bld->static_state->wrap_s,
+                                   &x_offset0, &x_offset1,
+                                   &x_subcoord[0], &x_subcoord[1]);
+   lp_build_sample_wrap_linear_int(bld,
+                                   bld->format_desc->block.height,
+                                   t_ipart, height, y_stride,
+                                   bld->static_state->pot_height,
+                                   bld->static_state->wrap_t,
+                                   &y_offset0, &y_offset1,
+                                   &y_subcoord[0], &y_subcoord[1]);
+
+   offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0);
+   offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0);
+   offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1);
+   offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1);
 
    /*
     * Transform 4 x i32 in
@@ -1838,7 +1930,6 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
       LLVMValueRef shuffle_lo;
       LLVMValueRef shuffle_hi;
-      unsigned i, j;
 
       for(j = 0; j < h16.type.length; j += 4) {
 #ifdef PIPE_ARCH_LITTLE_ENDIAN
@@ -1866,7 +1957,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
       t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
    }
 
-   stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
+   /*
+    * get pointer to mipmap level 0 data
+    */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level);
 
    /*
     * Fetch the pixels as 4 x 32bit (rgba order might differ):
@@ -1885,20 +1979,38 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
     * The higher 8 bits of the resulting elements will be zero.
     */
 
-   neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
-   neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
-   neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
-   neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
+   for (j = 0; j < 2; ++j) {
+      for (i = 0; i < 2; ++i) {
+         LLVMValueRef rgba8;
 
-   neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
-   neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
-   neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
-   neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
+         if (util_format_is_rgba8_variant(bld->format_desc)) {
+            /*
+             * Given the format is a rgba8, just read the pixels as is,
+             * without any swizzling. Swizzling will be done later.
+             */
+            rgba8 = lp_build_gather(bld->builder,
+                                    bld->texel_type.length,
+                                    bld->format_desc->block.bits,
+                                    bld->texel_type.width,
+                                    data_ptr, offset[j][i]);
 
-   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
-   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
-   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
-   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
+            rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
+
+         }
+         else {
+            rgba8 = lp_build_fetch_rgba_aos(bld->builder,
+                                            bld->format_desc,
+                                            u8n.type,
+                                            data_ptr, offset[j][i],
+                                            x_subcoord[i],
+                                            y_subcoord[j]);
+         }
+
+         lp_build_unpack2(builder, u8n.type, h16.type,
+                          rgba8,
+                          &neighbors_lo[j][i], &neighbors_hi[j][i]);
+      }
+   }
 
    /*
     * Linear interpolate with 8.8 fixed point.
-- 
cgit v1.2.3


From 3fa3c33844b8491a204cda6ae8d67cd6ada78b3b Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 31 Aug 2010 19:14:18 -0600
Subject: gallivm: fix bug in nested conditionals

This, plus the previous commit fix fd.o bug 29806.
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index ca8db9ce01..0e07f7f3f3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -200,8 +200,10 @@ static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
    }
    mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
    assert(LLVMTypeOf(val) == mask->int_vec_type);
-   mask->cond_mask = val;
-
+   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
+                                  mask->cond_mask,
+                                  val,
+                                  "");
    lp_exec_mask_update(mask);
 }
 
-- 
cgit v1.2.3