4 files changed, 381 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 9a6ee7a010..5dada65909 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -39,6 +39,20 @@ extern "C" {
 #include "../glsl/ir_optimization.h"
 #include "../glsl/ir_print_visitor.h"
 
+enum register_file {
+   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+   GRF = BRW_GENERAL_REGISTER_FILE,
+   MRF = BRW_MESSAGE_REGISTER_FILE,
+   IMM = BRW_IMMEDIATE_VALUE,
+   BAD_FILE
+};
+
+enum fs_opcodes {
+   FS_OPCODE_FB_WRITE = 256,
+};
+
+static int using_new_fs = -1;
+
 struct gl_shader *
 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
 {
@@ -77,18 +91,31 @@ brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
 GLboolean
 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
 {
-   static int using_new_fs = -1;
-
    if (using_new_fs == -1)
       using_new_fs = getenv("INTEL_NEW_FS") != NULL;
 
    for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
-      struct gl_shader *shader = prog->_LinkedShaders[i];
+      struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
+
+      if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
+	 void *mem_ctx = talloc_new(NULL);
+	 bool progress;
+
+	 shader->ir = new(shader) exec_list;
+	 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
 
-      if (using_new_fs && shader->Type == GL_FRAGMENT_SHADER) {
 	 do_mat_op_to_vec(shader->ir);
 	 brw_do_channel_expressions(shader->ir);
 	 brw_do_vector_splitting(shader->ir);
+
+	 do {
+	    progress = false;
+
+	    progress = do_common_optimization(shader->ir, true) || progress;
+	 } while (progress);
+
+	 reparent_ir(shader->ir, shader);
+	 talloc_free(mem_ctx);
       }
    }
 
@@ -97,3 +124,323 @@ brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
 
    return GL_TRUE;
 }
+
+class fs_reg {
+public:
+   fs_reg()
+   {
+      this->file = BAD_FILE;
+      this->reg = 0;
+      this->hw_reg = -1;
+   }
+
+   fs_reg(float f)
+   {
+      this->file = IMM;
+      this->reg = 0;
+      this->hw_reg = 0;
+      this->type = BRW_REGISTER_TYPE_F;
+      this->imm.f = f;
+   }
+
+   fs_reg(int32_t i)
+   {
+      this->file = IMM;
+      this->reg = 0;
+      this->hw_reg = 0;
+      this->type = BRW_REGISTER_TYPE_D;
+      this->imm.i = i;
+   }
+
+   fs_reg(uint32_t u)
+   {
+      this->file = IMM;
+      this->reg = 0;
+      this->hw_reg = 0;
+      this->type = BRW_REGISTER_TYPE_UD;
+      this->imm.u = u;
+   }
+
+   fs_reg(enum register_file file, int hw_reg)
+   {
+      this->file = file;
+      this->reg = 0;
+      this->hw_reg = hw_reg;
+      this->type = BRW_REGISTER_TYPE_F;
+   }
+
+   /** Register file: ARF, GRF, MRF, IMM. */
+   enum register_file file;
+   /** Abstract register number.  0 = fixed hw reg */
+   int reg;
+   /** HW register number.  Generally unset until register allocation. */
+   int hw_reg;
+   /** Register type.  BRW_REGISTER_TYPE_* */
+   int type;
+
+   /** Value for file == BRW_IMMMEDIATE_FILE */
+   union {
+      int32_t i;
+      uint32_t u;
+      float f;
+   } imm;
+};
+
+static const fs_reg reg_undef(BAD_FILE, -1);
+static const fs_reg reg_null(ARF, BRW_ARF_NULL);
+
+class fs_inst : public exec_node {
+public:
+   /* Callers of this talloc-based new need not call delete. It's
+    * easier to just talloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = talloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   fs_inst()
+   {
+      this->opcode = BRW_OPCODE_NOP;
+      this->dst = reg_undef;
+      this->src[0] = reg_undef;
+      this->src[1] = reg_undef;
+   }
+   fs_inst(int opcode, fs_reg dst, fs_reg src0)
+   {
+      this->opcode = opcode;
+      this->dst = dst;
+      this->src[0] = src0;
+      this->src[1] = reg_undef;
+   }
+   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+   {
+      this->opcode = opcode;
+      this->dst = dst;
+      this->src[0] = src0;
+      this->src[1] = src1;
+   }
+
+   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+   fs_reg dst;
+   fs_reg src[2];
+};
+
+class fs_visitor : public ir_hierarchical_visitor
+{
+public:
+
+   fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
+   {
+      this->c = c;
+      this->p = &c->func;
+      this->mem_ctx = talloc_new(NULL);
+      this->shader = shader;
+   }
+   ~fs_visitor()
+   {
+      talloc_free(this->mem_ctx);
+   }
+
+   fs_inst *emit(fs_inst inst);
+   void generate_code();
+   void generate_fb_write(fs_inst *inst);
+
+   void emit_dummy_fs();
+
+   struct brw_wm_compile *c;
+   struct brw_compile *p;
+   struct brw_shader *shader;
+   void *mem_ctx;
+   exec_list instructions;
+
+   int grf_used;
+
+};
+
+fs_inst *
+fs_visitor::emit(fs_inst inst)
+{
+   fs_inst *list_inst = new(mem_ctx) fs_inst;
+   *list_inst = inst;
+
+   this->instructions.push_tail(list_inst);
+
+   return list_inst;
+}
+
+/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
+void
+fs_visitor::emit_dummy_fs()
+{
+   /* Everyone's favorite color. */
+   emit(fs_inst(BRW_OPCODE_MOV,
+		fs_reg(MRF, 2),
+		fs_reg(1.0f)));
+   emit(fs_inst(BRW_OPCODE_MOV,
+		fs_reg(MRF, 3),
+		fs_reg(0.0f)));
+   emit(fs_inst(BRW_OPCODE_MOV,
+		fs_reg(MRF, 4),
+		fs_reg(1.0f)));
+   emit(fs_inst(BRW_OPCODE_MOV,
+		fs_reg(MRF, 5),
+		fs_reg(0.0f)));
+
+   fs_inst *write;
+   write = emit(fs_inst(FS_OPCODE_FB_WRITE,
+			fs_reg(0),
+			fs_reg(0)));
+}
+
+void
+fs_visitor::generate_fb_write(fs_inst *inst)
+{
+   GLboolean eot = 1; /* FINISHME: MRT */
+   /* FINISHME: AADS */
+
+   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
+    * move, here's g1.
+    */
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_MOV(p,
+	   brw_message_reg(1),
+	   brw_vec8_grf(1, 0));
+   brw_pop_insn_state(p);
+
+   int nr = 2 + 4;
+
+   brw_fb_WRITE(p,
+		8, /* dispatch_width */
+		retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+		0, /* base MRF */
+		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+		0, /* FINISHME: MRT target */
+		nr,
+		0,
+		eot);
+}
+
+void
+fs_visitor::generate_code()
+{
+   this->grf_used = 2; /* header */
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      fs_inst *inst = (fs_inst *)iter.get();
+      struct brw_reg src[2], dst;
+
+      for (unsigned int i = 0; i < 2; i++) {
+	 switch (inst->src[i].file) {
+	 case GRF:
+	 case ARF:
+	 case MRF:
+	    src[i] = brw_vec8_reg(inst->src[i].file,
+				  inst->src[i].hw_reg, 0);
+	    src[i] = retype(src[i], inst->src[i].type);
+	    break;
+	 case IMM:
+	    switch (inst->src[i].type) {
+	    case BRW_REGISTER_TYPE_F:
+	       src[i] = brw_imm_f(inst->src[i].imm.f);
+	       break;
+	    case BRW_REGISTER_TYPE_D:
+	       src[i] = brw_imm_f(inst->src[i].imm.i);
+	       break;
+	    case BRW_REGISTER_TYPE_UD:
+	       src[i] = brw_imm_f(inst->src[i].imm.u);
+	       break;
+	    default:
+	       assert(!"not reached");
+	       break;
+	    }
+	    break;
+	 case BAD_FILE:
+	    /* Probably unused. */
+	    src[i] = brw_null_reg();
+	 }
+      }
+      dst = brw_vec8_reg(inst->dst.file, inst->dst.hw_reg, 0);
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MOV:
+	 brw_MOV(p, dst, src[0]);
+	 break;
+      case FS_OPCODE_FB_WRITE:
+	 generate_fb_write(inst);
+	 break;
+      default:
+	 assert(!"not reached");
+      }
+   }
+}
+
+GLboolean
+brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   struct brw_shader *shader = NULL;
+   struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
+
+   if (!prog)
+      return GL_FALSE;
+
+   if (!using_new_fs)
+      return GL_FALSE;
+
+   for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
+      if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
+	 shader = (struct brw_shader *)prog->_LinkedShaders[i];
+	 break;
+      }
+   }
+   if (!shader)
+      return GL_FALSE;
+
+   /* We always use 8-wide mode, at least for now.  For one, flow
+    * control only works in 8-wide.  Also, when we're fragment shader
+    * bound, we're almost always under register pressure as well, so
+    * 8-wide would save us from the performance cliff of spilling
+    * regs.
+    */
+   c->dispatch_width = 8;
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      printf("GLSL IR for native fragment shader %d:\n", prog->Name);
+      _mesa_print_ir(shader->ir, NULL);
+      printf("\n");
+   }
+
+   /* Now the main event: Visit the shader IR and generate our FS IR for it.
+    */
+   fs_visitor v(c, shader);
+   visit_list_elements(&v, shader->ir);
+
+   v.emit_dummy_fs();
+
+   v.generate_code();
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      printf("Native code for fragment shader %d:\n", prog->Name);
+      for (unsigned int i = 0; i < p->nr_insn; i++)
+	 brw_disasm(stdout, &p->store[i], intel->gen);
+      printf("\n");
+   }
+
+   c->prog_data.nr_params = 0; /* FINISHME */
+   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
+   c->prog_data.urb_read_length = 1; /* FINISHME: attrs */
+   c->prog_data.curb_read_length = 0; /* FINISHME */
+   c->prog_data.total_grf = v.grf_used;
+   c->prog_data.total_scratch = 0;
+
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 34cefeea32..899e9b1dfb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -177,17 +177,19 @@ static void do_wm_prog( struct brw_context *brw,
    /* temporary sanity check assertion */
    ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
 
-   /*
-    * Shader which use GLSL features such as flow control are handled
-    * differently from "simple" shaders.
-    */
-   if (fp->isGLSL) {
-      c->dispatch_width = 8;
-      brw_wm_glsl_emit(brw, c);
-   }
-   else {
-      c->dispatch_width = 16;
-      brw_wm_non_glsl_emit(brw, c);
+   if (!brw_wm_fs_emit(brw, c)) {
+      /*
+       * Shader which use GLSL features such as flow control are handled
+       * differently from "simple" shaders.
+       */
+      if (fp->isGLSL) {
+	 c->dispatch_width = 8;
+	 brw_wm_glsl_emit(brw, c);
+      }
+      else {
+	 c->dispatch_width = 16;
+	 brw_wm_non_glsl_emit(brw, c);
+      }
    }
 
    if (INTEL_DEBUG & DEBUG_WM)
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 6a761e723b..2639d4f26b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -306,6 +306,7 @@ void brw_wm_lookup_iz( GLuint line_aa,
 
 GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
 void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c);
 
 /* brw_wm_emit.c */
 void emit_alu1(struct brw_compile *p,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index c1cf4db1ca..6699d0a73e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -104,8 +104,22 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
    key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
    key->is_glsl = bfp->isGLSL;
 
-   /* temporary sanity check assertion */
-   ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+   /* If using the fragment shader backend, the program is always
+    * 8-wide.
+    */
+   if (ctx->Shader.CurrentProgram) {
+      int i;
+
+      for (i = 0; i < ctx->Shader.CurrentProgram->_NumLinkedShaders; i++) {
+	 struct brw_shader *shader =
+	    (struct brw_shader *)ctx->Shader.CurrentProgram->_LinkedShaders[i];;
+
+	 if (shader->base.Type == GL_FRAGMENT_SHADER &&
+	     shader->ir != NULL) {
+	    key->is_glsl = GL_TRUE;
+	 }
+      }
+   }
 
    /* _NEW_DEPTH */
    key->stats_wm = intel->stats_wm;