Code reorganization: move files into their places.

This is in a separate commit to ensure renames are properly preserved.
author: José Fonseca <jrfonseca@tungstengraphics.com> 2008-02-15 17:35:24 +0900
committer: José Fonseca <jrfonseca@tungstengraphics.com> 2008-02-15 17:45:40 +0900
commit: b642730be93149baa7556e5791393168ab396175 (patch)
tree: 67d174be5d4814261b93bc8b20c027a26d1c3d06 /src/gallium/drivers/softpipe/sp_quad_fs.c
parent: 4593be34b2a6e494f0e476c8aa8e1d2633fffd47 (diff)
1 files changed, 390 insertions, 0 deletions
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
new file mode 100644
index 0000000000..3316858413
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -0,0 +1,390 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/* Vertices are just an array of floats, with all the attributes
+ * packed.  We currently assume a layout like:
+ *
+ * attr[0][0..3] - window position
+ * attr[1..n][0..3] - remaining attributes.
+ *
+ * Attributes are assumed to be 4 floats wide but are packed so that
+ * all the enabled attributes run contiguously.
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "x86/rtasm/x86sse.h"
+
+#ifdef MESA_LLVM
+#include "pipe/llvm/gallivm.h"
+#endif
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_headers.h"
+#include "sp_quad.h"
+#include "sp_texture.h"
+#include "sp_tex_sample.h"
+
+
+struct quad_shade_stage
+{
+   struct quad_stage stage;
+   struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
+   struct tgsi_exec_machine machine;
+   struct tgsi_exec_vector *inputs, *outputs;
+   int colorOutSlot, depthOutSlot;
+#ifdef MESA_LLVM
+   struct gallivm_prog *llvm_prog;
+#endif
+};
+
+
+/** cast wrapper */
+static INLINE struct quad_shade_stage *
+quad_shade_stage(struct quad_stage *qs)
+{
+   return (struct quad_shade_stage *) qs;
+}
+
+
+/**
+ * Compute quad X,Y,Z,W for the four fragments in a quad.
+ * Note that we only need to "compute" X and Y for the upper-left fragment.
+ * We could do less work if we're not depth testing, or there's no
+ * perspective-corrected attributes, but that's seldom.
+ */
+static void
+setup_pos_vector(const struct tgsi_interp_coef *coef,
+                 float x, float y,
+                 struct tgsi_exec_vector *quadpos)
+{
+   uint chan;
+   /* do X */
+   quadpos->xyzw[0].f[0] = x;
+   /* do Y */
+   quadpos->xyzw[1].f[0] = y;
+   /* do Z and W for all fragments in the quad */
+   for (chan = 2; chan < 4; chan++) {
+      const float dadx = coef->dadx[chan];
+      const float dady = coef->dady[chan];
+      const float a0 = coef->a0[chan] + dadx * x + dady * y;
+      quadpos->xyzw[chan].f[0] = a0;
+      quadpos->xyzw[chan].f[1] = a0 + dadx;
+      quadpos->xyzw[chan].f[2] = a0 + dady;
+      quadpos->xyzw[chan].f[3] = a0 + dadx + dady;
+   }
+}
+
+
+typedef void (XSTDCALL *codegen_function)(
+   const struct tgsi_exec_vector *input,
+   struct tgsi_exec_vector *output,
+   float (*constant)[4],
+   struct tgsi_exec_vector *temporary,
+   const struct tgsi_interp_coef *coef
+#if 0
+   ,const struct tgsi_exec_vector *quadPos
+#endif
+ );
+
+
+/**
+ * Execute fragment shader for the four fragments in the quad.
+ */
+static void
+shade_quad(
+   struct quad_stage *qs,
+   struct quad_header *quad )
+{
+   struct quad_shade_stage *qss = quad_shade_stage( qs );
+   struct softpipe_context *softpipe = qs->softpipe;
+   struct tgsi_exec_machine *machine = &qss->machine;
+
+   /* Consts do not require 16 byte alignment. */
+   machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
+
+   machine->InterpCoefs = quad->coef;
+
+   /* Compute X, Y, Z, W vals for this quad */
+   setup_pos_vector(quad->posCoef, (float) quad->x0, (float) quad->y0, &machine->QuadPos);
+
+   /* run shader */
+#if defined(__i386__) || defined(__386__)
+   if( softpipe->use_sse ) {
+      codegen_function func = (codegen_function) x86_get_func( &softpipe->fs->sse2_program );
+      func(
+         machine->Inputs,
+         machine->Outputs,
+         machine->Consts,
+         machine->Temps,
+         machine->InterpCoefs
+#if 0
+         ,machine->QuadPos
+#endif
+           );
+      quad->mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]);
+   }
+   else
+#endif
+   {
+      quad->mask &= tgsi_exec_machine_run( machine );
+   }
+
+   /* store result color */
+   if (qss->colorOutSlot >= 0) {
+      /* XXX need to handle multiple color outputs someday */
+      assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot]
+             == TGSI_SEMANTIC_COLOR);
+      memcpy(
+             quad->outputs.color,
+             &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0],
+             sizeof( quad->outputs.color ) );
+   }
+
+   /*
+    * XXX the following code for updating quad->outputs.depth
+    * isn't really needed if we did early z testing.
+    */
+
+   /* store result Z */
+   if (qss->depthOutSlot >= 0) {
+      /* output[slot] is new Z */
+      uint i;
+      for (i = 0; i < 4; i++) {
+         quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i];
+      }
+   }
+   else {
+      /* copy input Z (which was interpolated by the executor) to output Z */
+      uint i;
+      for (i = 0; i < 4; i++) {
+         quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i];
+         /* XXX not sure the above line is always correct.  The following
+          * might be better:
+         quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i];
+          */
+      }
+   }
+
+   /* shader may cull fragments */
+   if( quad->mask ) {
+      qs->next->run( qs->next, quad );
+   }
+}
+
+#if 0
+#ifdef MESA_LLVM
+#define DLLVM 0
+static void
+shade_quad_llvm(struct quad_stage *qs,
+                struct quad_header *quad)
+{
+   struct quad_shade_stage *qss = quad_shade_stage(qs);
+   struct softpipe_context *softpipe = qs->softpipe;
+   float dests[4][16][4] ALIGN16_ATTRIB;
+   float inputs[4][16][4] ALIGN16_ATTRIB;
+   const float fx = (float) quad->x0;
+   const float fy = (float) quad->y0;
+   struct gallivm_prog *llvm = qss->llvm_prog;
+
+   inputs[0][0][0] = fx;
+   inputs[1][0][0] = fx + 1.0f;
+   inputs[2][0][0] = fx;
+   inputs[3][0][0] = fx + 1.0f;
+
+   inputs[0][0][1] = fy;
+   inputs[1][0][1] = fy;
+   inputs[2][0][1] = fy + 1.0f;
+   inputs[3][0][1] = fy + 1.0f;
+#if DLLVM
+   debug_printf("MASK = %d\n", quad->mask);
+#endif
+   gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef);
+#if DLLVM
+   for (int i = 0; i < 4; ++i) {
+      for (int j = 0; j < 2; ++j) {
+         debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j, 
+                inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]);
+      }
+   }
+#endif
+
+   quad->mask &=
+      gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs,
+                                   softpipe->mapped_constants[PIPE_SHADER_FRAGMENT],
+                                   qss->samplers);
+#if DLLVM
+   debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n",
+          dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3], 
+          dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]);
+#endif
+
+   /* store result color */
+   if (qss->colorOutSlot >= 0) {
+      unsigned i;
+      /* XXX need to handle multiple color outputs someday */
+      assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot]
+             == TGSI_SEMANTIC_COLOR);
+      for (i = 0; i < QUAD_SIZE; ++i) {
+         quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0];
+         quad->outputs.color[1][i] = dests[i][qss->colorOutSlot][1];
+         quad->outputs.color[2][i] = dests[i][qss->colorOutSlot][2];
+         quad->outputs.color[3][i] = dests[i][qss->colorOutSlot][3];
+      }
+   }
+#if DLLVM
+   for (int i = 0; i < QUAD_SIZE; ++i) {
+      debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot,
+             quad->outputs.color[0][i],
+             quad->outputs.color[1][i],
+             quad->outputs.color[2][i],
+             quad->outputs.color[3][i]);
+   }
+#endif
+
+   /* store result Z */
+   if (qss->depthOutSlot >= 0) {
+      /* output[slot] is new Z */
+      uint i;
+      for (i = 0; i < 4; i++) {
+         quad->outputs.depth[i] = dests[i][0][2];
+      }
+   }
+   else {
+      /* copy input Z (which was interpolated by the executor) to output Z */
+      uint i;
+      for (i = 0; i < 4; i++) {
+         quad->outputs.depth[i] = inputs[i][0][2];
+      }
+   }
+#if DLLVM
+   debug_printf("D [%f, %f, %f, %f] mask = %d\n",
+             quad->outputs.depth[0],
+             quad->outputs.depth[1],
+             quad->outputs.depth[2],
+             quad->outputs.depth[3], quad->mask);
+#endif
+
+   /* shader may cull fragments */
+   if( quad->mask ) {
+      qs->next->run( qs->next, quad );
+   }
+}
+#endif /*MESA_LLVM*/
+#endif
+
+/**
+ * Per-primitive (or per-begin?) setup
+ */
+static void shade_begin(struct quad_stage *qs)
+{
+   struct quad_shade_stage *qss = quad_shade_stage(qs);
+   struct softpipe_context *softpipe = qs->softpipe;
+   unsigned i;
+
+   /* set TGSI sampler state that varies */
+   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+      qss->samplers[i].state = softpipe->sampler[i];
+      qss->samplers[i].texture = &softpipe->texture[i]->base;
+   }
+
+#ifdef MESA_LLVM
+   qss->llvm_prog = softpipe->fs->llvm_prog;
+#endif
+   /* XXX only do this if the fragment shader changes... */
+   tgsi_exec_machine_init(&qss->machine,
+                          softpipe->fs->shader.tokens,
+                          PIPE_MAX_SAMPLERS,
+                          qss->samplers );
+
+   /* find output slots for depth, color */
+   qss->colorOutSlot = -1;
+   qss->depthOutSlot = -1;
+   for (i = 0; i < qss->stage.softpipe->fs->shader.num_outputs; i++) {
+      switch (qss->stage.softpipe->fs->shader.output_semantic_name[i]) {
+      case TGSI_SEMANTIC_POSITION:
+         qss->depthOutSlot = i;
+         break;
+      case TGSI_SEMANTIC_COLOR:
+         qss->colorOutSlot = i;
+         break;
+      }
+   }
+
+   qs->next->begin(qs->next);
+}
+
+
+static void shade_destroy(struct quad_stage *qs)
+{
+   struct quad_shade_stage *qss = (struct quad_shade_stage *) qs;
+
+   tgsi_exec_machine_free_data(&qss->machine);
+   FREE( qss->inputs );
+   FREE( qss->outputs );
+   FREE( qs );
+}
+
+
+struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
+{
+   struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
+   uint i;
+
+   /* allocate storage for program inputs/outputs, aligned to 16 bytes */
+   qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16);
+   qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16);
+   qss->machine.Inputs = align16(qss->inputs);
+   qss->machine.Outputs = align16(qss->outputs);
+
+   qss->stage.softpipe = softpipe;
+   qss->stage.begin = shade_begin;
+#ifdef MESA_LLVM
+   /* disable until ported to accept
+    * x/y and soa layout
+   qss->stage.run = shade_quad_llvm;
+   */
+   softpipe->use_sse = FALSE;
+   qss->stage.run = shade_quad;
+#else
+   qss->stage.run = shade_quad;
+#endif
+   qss->stage.destroy = shade_destroy;
+
+   /* set TGSI sampler state that's constant */
+   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+      assert(softpipe->tex_cache[i]);
+      qss->samplers[i].get_samples = sp_get_samples;
+      qss->samplers[i].pipe = &softpipe->pipe;
+      qss->samplers[i].cache = softpipe->tex_cache[i];
+   }
+
+   return &qss->stage;
+}
author	José Fonseca <jrfonseca@tungstengraphics.com>	2008-02-15 17:35:24 +0900
committer	José Fonseca <jrfonseca@tungstengraphics.com>	2008-02-15 17:45:40 +0900
commit	b642730be93149baa7556e5791393168ab396175 (patch)
tree	67d174be5d4814261b93bc8b20c027a26d1c3d06 /src/gallium/drivers/softpipe/sp_quad_fs.c
parent	4593be34b2a6e494f0e476c8aa8e1d2633fffd47 (diff)