From 4c2247538394a313e1e90bfcd07c1ab9c7d41281 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 12 Nov 2010 15:17:40 +0100
Subject: nvc0: import nvc0 gallium driver

---
 src/gallium/drivers/nvc0/nvc0_pc.h | 648 +++++++++++++++++++++++++++++++++++++
 1 file changed, 648 insertions(+)
 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc.h

(limited to 'src/gallium/drivers/nvc0/nvc0_pc.h')

diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h
new file mode 100644
index 0000000000..b48b0b1fba
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc.h
@@ -0,0 +1,648 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NVC0_COMPILER_H__
+#define __NVC0_COMPILER_H__
+
+#include <stdio.h>
+
+#ifndef NOUVEAU_DBG
+#ifdef NOUVEAU_DEBUG
+# define NOUVEAU_DBG(args...) debug_printf(args);
+#else
+# define NOUVEAU_DBG(args...)
+#endif
+#endif
+
+#ifndef NOUVEAU_ERR
+#define NOUVEAU_ERR(fmt, args...) \
+   fprintf(stderr, "%s:%d -  "fmt, __FUNCTION__, __LINE__, ##args);
+#endif
+
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_double_list.h"
+
+/* pseudo opcodes */
+#define NV_OP_UNDEF      0
+#define NV_OP_BIND       1
+#define NV_OP_MERGE      2
+#define NV_OP_PHI        3
+#define NV_OP_SELECT     4
+#define NV_OP_NOP        5
+
+/**
+ * BIND forces source operand i into the same register as destination operand i
+ * SELECT forces its multiple source operands and its destination operand into
+ *  one and the same register.
+ */
+
+/* base opcodes */
+#define NV_OP_LD         6
+#define NV_OP_ST         7
+#define NV_OP_MOV        8
+#define NV_OP_AND        9
+#define NV_OP_OR        10
+#define NV_OP_XOR       11
+#define NV_OP_SHL       12
+#define NV_OP_SHR       13
+#define NV_OP_NOT       14
+#define NV_OP_SET       15
+#define NV_OP_ADD       16
+#define NV_OP_SUB       17
+#define NV_OP_MUL       18
+#define NV_OP_MAD       19
+#define NV_OP_ABS       20
+#define NV_OP_NEG       21
+#define NV_OP_MAX       22
+#define NV_OP_MIN       23
+#define NV_OP_CVT       24
+#define NV_OP_CEIL      25
+#define NV_OP_FLOOR     26
+#define NV_OP_TRUNC     27
+#define NV_OP_SAD       28
+
+/* shader opcodes */
+#define NV_OP_VFETCH    29
+#define NV_OP_PFETCH    30
+#define NV_OP_EXPORT    31
+#define NV_OP_LINTERP   32
+#define NV_OP_PINTERP   33
+#define NV_OP_EMIT      34
+#define NV_OP_RESTART   35
+#define NV_OP_TEX       36
+#define NV_OP_TXB       37
+#define NV_OP_TXL       38
+#define NV_OP_TXF       39
+#define NV_OP_TXQ       40
+#define NV_OP_QUADOP    41
+#define NV_OP_DFDX      42
+#define NV_OP_DFDY      43
+#define NV_OP_KIL       44
+
+/* control flow opcodes */
+#define NV_OP_BRA       45
+#define NV_OP_CALL      46
+#define NV_OP_RET       47
+#define NV_OP_EXIT      48
+#define NV_OP_BREAK     49
+#define NV_OP_BREAKADDR 50
+#define NV_OP_JOINAT    51
+#define NV_OP_JOIN      52
+
+/* typed opcodes */
+#define NV_OP_ADD_F32   NV_OP_ADD
+#define NV_OP_ADD_B32   53
+#define NV_OP_MUL_F32   NV_OP_MUL
+#define NV_OP_MUL_B32   54
+#define NV_OP_ABS_F32   NV_OP_ABS
+#define NV_OP_ABS_S32   55
+#define NV_OP_NEG_F32   NV_OP_NEG
+#define NV_OP_NEG_S32   56
+#define NV_OP_MAX_F32   NV_OP_MAX
+#define NV_OP_MAX_S32   57
+#define NV_OP_MAX_U32   58
+#define NV_OP_MIN_F32   NV_OP_MIN
+#define NV_OP_MIN_S32   59
+#define NV_OP_MIN_U32   60
+#define NV_OP_SET_F32   61
+#define NV_OP_SET_S32   62
+#define NV_OP_SET_U32   63
+#define NV_OP_SAR       64
+#define NV_OP_RCP       65
+#define NV_OP_RSQ       66
+#define NV_OP_LG2       67
+#define NV_OP_SIN       68
+#define NV_OP_COS       69
+#define NV_OP_EX2       70
+#define NV_OP_PRESIN    71
+#define NV_OP_PREEX2    72
+#define NV_OP_SAT       73
+
+/* newly added opcodes */
+#define NV_OP_SET_F32_AND 74
+#define NV_OP_SET_F32_OR  75
+#define NV_OP_SET_F32_XOR 76
+#define NV_OP_SELP        77
+#define NV_OP_SLCT        78
+#define NV_OP_SLCT_F32    NV_OP_SLCT
+#define NV_OP_SLCT_S32    79
+#define NV_OP_SLCT_U32    80
+#define NV_OP_SUB_F32     NV_OP_SUB
+#define NV_OP_SUB_S32     81
+#define NV_OP_MAD_F32     NV_OP_MAD
+#define NV_OP_FSET_F32    82
+
+#define NV_OP_COUNT     83
+
+/* nv50 files omitted */
+#define NV_FILE_GPR      0
+#define NV_FILE_COND     1
+#define NV_FILE_PRED     2
+#define NV_FILE_IMM      16
+#define NV_FILE_MEM_S    32
+#define NV_FILE_MEM_V    34
+#define NV_FILE_MEM_A    35
+#define NV_FILE_MEM_L    48
+#define NV_FILE_MEM_G    64
+#define NV_FILE_MEM_C(i) (80 + i)
+
+#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S)
+
+#define NV_MOD_NEG 1
+#define NV_MOD_ABS 2
+#define NV_MOD_NOT 4
+#define NV_MOD_SAT 8
+
+#define NV_TYPE_U8  0x00
+#define NV_TYPE_S8  0x01
+#define NV_TYPE_U16 0x02
+#define NV_TYPE_S16 0x03
+#define NV_TYPE_U32 0x04
+#define NV_TYPE_S32 0x05
+#define NV_TYPE_P32 0x07
+#define NV_TYPE_F32 0x09
+#define NV_TYPE_F64 0x0b
+#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4))
+#define NV_TYPE_ANY 0xff
+
+#define NV_TYPE_ISINT(t) ((t) < 7)
+#define NV_TYPE_ISSGD(t) ((t) & 1)
+
+#define NV_CC_FL 0x0
+#define NV_CC_LT 0x1
+#define NV_CC_EQ 0x2
+#define NV_CC_LE 0x3
+#define NV_CC_GT 0x4
+#define NV_CC_NE 0x5
+#define NV_CC_GE 0x6
+#define NV_CC_U  0x8
+#define NV_CC_TR 0xf
+#define NV_CC_O  0x10
+#define NV_CC_C  0x11
+#define NV_CC_A  0x12
+#define NV_CC_S  0x13
+
+#define NV_PC_MAX_INSTRUCTIONS 2048
+#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
+
+#define NV_PC_MAX_BASIC_BLOCKS 1024
+
+struct nv_op_info {
+   uint base;                /* e.g. ADD_S32 -> ADD */
+   char name[12];
+   uint8_t type;
+   uint8_t mods;
+   unsigned flow        : 1;
+   unsigned commutative : 1;
+   unsigned vector      : 1;
+   unsigned predicate   : 1;
+   unsigned pseudo      : 1;
+   unsigned immediate   : 3;
+   unsigned memory      : 3;
+};
+
+extern struct nv_op_info nvc0_op_info_table[];
+
+#define NV_BASEOP(op) (nvc0_op_info_table[op].base)
+#define NV_OPTYPE(op) (nvc0_op_info_table[op].type)
+
+static INLINE uint
+nv_op_base(uint opcode)
+{
+   return nvc0_op_info_table[opcode].base;
+}
+
+static INLINE boolean
+nv_is_texture_op(uint opcode)
+{
+   return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ);
+}
+
+static INLINE boolean
+nv_is_vector_op(uint opcode)
+{
+   return nvc0_op_info_table[opcode].vector ? TRUE : FALSE;
+}
+
+static INLINE boolean
+nv_op_commutative(uint opcode)
+{
+   return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE;
+}
+
+static INLINE uint8_t
+nv_op_supported_src_mods(uint opcode)
+{
+   return nvc0_op_info_table[opcode].mods;
+}
+
+static INLINE boolean
+nv_op_predicateable(uint opcode)
+{
+   return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE;
+}
+
+static INLINE uint
+nv_type_order(ubyte type)
+{
+   switch (type & 0xf) {
+   case NV_TYPE_U8:
+   case NV_TYPE_S8:
+      return 0;
+   case NV_TYPE_U16:
+   case NV_TYPE_S16:
+      return 1;
+   case NV_TYPE_U32:
+   case NV_TYPE_F32:
+   case NV_TYPE_S32:
+   case NV_TYPE_P32:
+      return 2;
+   case NV_TYPE_F64:
+      return 3;
+   }
+   assert(0);
+   return 0;
+}
+
+static INLINE uint
+nv_type_sizeof(ubyte type)
+{
+   if (type & 0xf0)
+      return (1 << nv_type_order(type)) * (type >> 4);
+   return 1 << nv_type_order(type);
+}
+
+static INLINE uint
+nv_type_sizeof_base(ubyte type)
+{
+   return 1 << nv_type_order(type);
+}
+
+struct nv_reg {
+   uint32_t address; /* for memory locations */
+   int id; /* for registers */
+   ubyte file;
+   ubyte size;
+   union {
+      int32_t s32;
+      int64_t s64;
+      uint64_t u64;
+      uint32_t u32;
+      float f32;
+      double f64;
+   } imm;
+};
+
+struct nv_range {
+   struct nv_range *next;
+   int bgn;
+   int end;
+};
+
+struct nv_ref;
+
+struct nv_value {
+   struct nv_reg reg; 
+   struct nv_instruction *insn;
+   struct nv_value *join;
+   struct nv_ref *last_use;
+   int n;
+   struct nv_range *livei;
+   int refc;
+   struct nv_value *next;
+   struct nv_value *prev;
+};
+
+struct nv_ref {
+   struct nv_value *value;
+   struct nv_instruction *insn;
+   struct list_head list; /* connects uses of the same value */
+   uint8_t mod;
+   uint8_t flags;
+};
+
+struct nv_basic_block;
+
+struct nv_instruction {
+   struct nv_instruction *next;
+   struct nv_instruction *prev;
+   uint opcode;
+   uint serial;
+
+   struct nv_value *def[5];
+   struct nv_ref *src[6];
+
+   int8_t predicate; /* index of predicate src */
+   int8_t indirect;  /* index of pointer src */
+
+   union {
+      struct {
+         uint8_t t; /* TIC binding */
+         uint8_t s; /* TSC binding */
+      } tex;
+      struct {
+         uint8_t d; /* output type */
+         uint8_t s; /* input type */
+      } cvt;
+   } ext;
+
+   struct nv_basic_block *bb;
+   struct nv_basic_block *target; /* target block of control flow insn */
+
+   unsigned cc         : 5; /* condition code */
+   unsigned fixed      : 1; /* don't optimize away (prematurely) */
+   unsigned terminator : 1;
+   unsigned join       : 1;
+   unsigned set_cond   : 4; /* 2nd byte */
+   unsigned saturate   : 1;
+   unsigned centroid   : 1;
+   unsigned flat       : 1;
+   unsigned patch      : 1;
+   unsigned lanes      : 4; /* 3rd byte */
+   unsigned tex_argc   : 3;
+   unsigned tex_live   : 1;
+   unsigned tex_cube   : 1; /* 4th byte */
+   unsigned tex_mask   : 4;
+
+   uint8_t quadop;
+};
+
+static INLINE int
+nvi_vector_size(struct nv_instruction *nvi)
+{
+   int i;
+   assert(nvi);
+   for (i = 0; i < 5 && nvi->def[i]; ++i);
+   return i;
+}
+
+#define CFG_EDGE_FORWARD     0
+#define CFG_EDGE_BACK        1
+#define CFG_EDGE_LOOP_ENTER  2
+#define CFG_EDGE_LOOP_LEAVE  4
+#define CFG_EDGE_FAKE        8
+
+/* 'WALL' edge means where reachability check doesn't follow */
+/* 'LOOP' edge means just having to do with loops */
+#define IS_LOOP_EDGE(k) ((k) & 7)
+#define IS_WALL_EDGE(k) ((k) & 9)
+
+struct nv_basic_block {
+   struct nv_instruction *entry; /* first non-phi instruction */
+   struct nv_instruction *exit;
+   struct nv_instruction *phi; /* very first instruction */
+   int num_instructions;
+
+   struct nv_basic_block *out[2]; /* no indirect branches -> 2 */
+   struct nv_basic_block *in[8]; /* hope that suffices */
+   uint num_in;
+   ubyte out_kind[2];
+   ubyte in_kind[8];
+
+   int id;
+   int subroutine;
+   uint priv; /* reset to 0 after you're done */
+   uint pass_seq;
+
+   uint32_t emit_pos; /* position, size in emitted code */
+   uint32_t emit_size;
+
+   uint32_t live_set[NV_PC_MAX_VALUES / 32];
+};
+
+struct nvc0_translation_info;
+
+struct nv_pc {
+   struct nv_basic_block **root;
+   struct nv_basic_block *current_block;
+   struct nv_basic_block *parent_block;
+
+   int loop_nesting_bound;
+   uint pass_seq;
+
+   struct nv_value values[NV_PC_MAX_VALUES];
+   struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS];
+   struct nv_ref **refs;
+   struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS];
+   int num_values;
+   int num_instructions;
+   int num_refs;
+   int num_blocks;
+   int num_subroutines;
+
+   int max_reg[4];
+
+   uint32_t *immd_buf; /* populated on emit */
+   unsigned immd_count;
+
+   uint32_t *emit;
+   unsigned emit_size;
+   unsigned emit_pos;
+
+   void *reloc_entries;
+   unsigned num_relocs;
+
+   /* optimization enables */
+   boolean opt_reload_elim;
+   boolean is_fragprog;
+};
+
+void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *);
+void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *);
+
+static INLINE struct nv_instruction *
+nv_alloc_instruction(struct nv_pc *pc, uint opcode)
+{
+   struct nv_instruction *insn;
+
+   insn = &pc->instructions[pc->num_instructions++];
+   assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
+
+   insn->opcode = opcode;
+   insn->cc = 0;
+   insn->indirect = -1;
+   insn->predicate = -1;
+
+   return insn;
+}
+
+static INLINE struct nv_instruction *
+new_instruction(struct nv_pc *pc, uint opcode)
+{
+   struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
+
+   nvc0_insn_append(pc->current_block, insn);
+   return insn;
+}
+
+static INLINE struct nv_instruction *
+new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode)
+{
+   struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
+
+   nvc0_insn_insert_after(at, insn);
+   return insn;
+}
+
+static INLINE struct nv_value *
+new_value(struct nv_pc *pc, ubyte file, ubyte size)
+{
+   struct nv_value *value = &pc->values[pc->num_values];
+
+   assert(pc->num_values < NV_PC_MAX_VALUES - 1);
+
+   value->n = pc->num_values++;
+   value->join = value;
+   value->reg.id = -1;
+   value->reg.file = file;
+   value->reg.size = size;
+   return value;
+}
+
+static INLINE struct nv_value *
+new_value_like(struct nv_pc *pc, struct nv_value *like)
+{
+   return new_value(pc, like->reg.file, like->reg.size);
+}
+
+static INLINE struct nv_ref *
+new_ref(struct nv_pc *pc, struct nv_value *val)
+{
+   int i;
+   struct nv_ref *ref;
+
+   if ((pc->num_refs % 64) == 0) {
+      const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
+      const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
+
+      pc->refs = REALLOC(pc->refs, old_size, new_size);
+
+      ref = CALLOC(64, sizeof(struct nv_ref));
+      for (i = 0; i < 64; ++i)
+         pc->refs[pc->num_refs + i] = &ref[i];
+   }
+
+   ref = pc->refs[pc->num_refs++];
+   ref->value = val;
+
+   LIST_INITHEAD(&ref->list);
+
+   ++val->refc;
+   return ref;
+}
+
+static INLINE struct nv_basic_block *
+new_basic_block(struct nv_pc *pc)
+{
+   struct nv_basic_block *bb;
+
+   if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS)
+      return NULL;
+
+   bb = CALLOC_STRUCT(nv_basic_block);
+
+   bb->id = pc->num_blocks;
+   pc->bb_list[pc->num_blocks++] = bb;
+   return bb;
+}
+
+static INLINE void
+nv_reference(struct nv_pc *pc,
+             struct nv_instruction *nvi, int c, struct nv_value *s)
+{
+   struct nv_ref **d = &nvi->src[c];
+   assert(c < 6);
+
+   if (*d) {
+      --(*d)->value->refc;
+      LIST_DEL(&(*d)->list);
+   }
+
+   if (s) {
+      if (!*d) {
+         *d = new_ref(pc, s);
+         (*d)->insn = nvi;
+      } else {
+         LIST_DEL(&(*d)->list);
+         (*d)->value = s;
+         ++(s->refc);
+      }
+      if (!s->last_use)
+         s->last_use = *d;
+      else
+         LIST_ADDTAIL(&s->last_use->list, &(*d)->list);
+
+      s->last_use = *d;
+      (*d)->insn = nvi;
+   } else {
+      *d = NULL;
+   }
+}
+
+/* nvc0_emit.c */
+void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *);
+
+/* nvc0_print.c */
+const char *nvc0_opcode_name(uint opcode);
+void nvc0_print_instruction(struct nv_instruction *);
+
+/* nvc0_pc.c */
+void nvc0_print_function(struct nv_basic_block *root);
+void nvc0_print_program(struct nv_pc *);
+
+boolean nvc0_insn_can_load(struct nv_instruction *, int s,
+                           struct nv_instruction *);
+boolean nvc0_insn_is_predicateable(struct nv_instruction *);
+
+int nvc0_insn_refcount(struct nv_instruction *);
+void nvc0_insn_delete(struct nv_instruction *);
+void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *);
+
+void nvc0_bblock_attach(struct nv_basic_block *parent,
+                        struct nv_basic_block *child, ubyte edge_kind);
+boolean nvc0_bblock_dominated_by(struct nv_basic_block *,
+                                 struct nv_basic_block *);
+boolean nvc0_bblock_reachable_by(struct nv_basic_block *future,
+                                 struct nv_basic_block *past,
+                                 struct nv_basic_block *final);
+struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *);
+
+int nvc0_pc_replace_value(struct nv_pc *pc,
+                          struct nv_value *old_val,
+                          struct nv_value *new_val);
+
+struct nv_value *nvc0_pc_find_immediate(struct nv_ref *);
+struct nv_value *nvc0_pc_find_constant(struct nv_ref *);
+
+typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b);
+
+void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *);
+
+int nvc0_pc_exec_pass0(struct nv_pc *pc);
+int nvc0_pc_exec_pass1(struct nv_pc *pc);
+int nvc0_pc_exec_pass2(struct nv_pc *pc);
+
+int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *);
+
+#endif // NV50_COMPILER_H
-- 
cgit v1.2.3


From 51f22689a419a8a13ca105e8ffc905b5fadea0db Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 10 Dec 2010 20:13:40 +0100
Subject: nvc0: fix branching ops

- bra is PC relative
- jump to else condition was inverted
- handle integer comparisons
---
 src/gallium/drivers/nvc0/nvc0_pc.c         |  3 ++-
 src/gallium/drivers/nvc0/nvc0_pc.h         |  6 +++---
 src/gallium/drivers/nvc0/nvc0_pc_emit.c    | 28 ++++++++++++++++++++--------
 src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c |  2 +-
 4 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'src/gallium/drivers/nvc0/nvc0_pc.h')

diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c
index e38f6ced24..cf7b8e347f 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc.c
@@ -328,6 +328,7 @@ nvc0_emit_program(struct nv_pc *pc)
 
    NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size);
 
+   pc->emit_pos = 0;
    for (n = 0; n < pc->num_blocks; ++n) {
       struct nv_instruction *i;
       struct nv_basic_block *b = pc->bb_list[n];
@@ -335,7 +336,7 @@ nvc0_emit_program(struct nv_pc *pc)
       for (i = b->entry; i; i = i->next) {
          nvc0_emit_instruction(pc, i);
          pc->emit += 2;
-         pc->emit_pos += 2;
+         pc->emit_pos += 8;
       }
    }
    assert(pc->emit == &code[pc->emit_size / 4]);
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h
index b48b0b1fba..df0314965a 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc.h
+++ b/src/gallium/drivers/nvc0/nvc0_pc.h
@@ -425,7 +425,7 @@ struct nv_basic_block {
    uint priv; /* reset to 0 after you're done */
    uint pass_seq;
 
-   uint32_t emit_pos; /* position, size in emitted code */
+   uint32_t emit_pos; /* position, size in emitted code (in bytes) */
    uint32_t emit_size;
 
    uint32_t live_set[NV_PC_MAX_VALUES / 32];
@@ -457,8 +457,8 @@ struct nv_pc {
    unsigned immd_count;
 
    uint32_t *emit;
-   unsigned emit_size;
-   unsigned emit_pos;
+   uint32_t emit_size;
+   uint32_t emit_pos;
 
    void *reloc_entries;
    unsigned num_relocs;
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
index 6735f93fd3..cd1ad03b00 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
@@ -73,7 +73,7 @@ create_fixup(struct nv_pc *pc, uint8_t ty,
 
    f = (struct nvc0_fixup *)pc->reloc_entries;
 
-   f[n].ofst = (pc->emit_pos + w) * 4;
+   f[n].ofst = pc->emit_pos + w * 4;
    f[n].type = ty;
    f[n].data = data;
    f[n].mask = m;
@@ -217,19 +217,26 @@ const_space_index(struct nv_instruction *i, int s)
 static void
 emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op)
 {
-   pc->emit[0] = 0x000001e7;
+   pc->emit[0] = 0x00000007;
    pc->emit[1] = op << 24;
 
-   set_pred(pc, i);
+   if (op == 0x40 || (op >= 0x80 && op <= 0x98)) {
+      /* bra, exit, ret or kil */
+      pc->emit[0] |= 0x1e0;
+      set_pred(pc, i);
+   }
 
    if (i->target) {
-      uint32_t pos = i->target->emit_pos;
+      int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8);
 
+      /* we will need relocations only for global functions */
+      /*
       create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000);
       create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff);
+      */
 
-      pc->emit[0] |= (pos & 0x3f) << 26;
-      pc->emit[1] |= (pos >> 6) & 0x1ffff;
+      pc->emit[0] |= (pcrel & 0x3f) << 26;
+      pc->emit[1] |= (pcrel >> 6) & 0x1ffff;
    }
 }
 
@@ -893,6 +900,11 @@ nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
       emit_mul_f32(pc, i);
       break;
    case NV_OP_SET_F32:
+   case NV_OP_SET_F32_AND:
+   case NV_OP_SET_F32_OR:
+   case NV_OP_SET_F32_XOR:
+   case NV_OP_SET_S32:
+   case NV_OP_SET_U32:
    case NV_OP_FSET_F32:
       emit_set(pc, i);
       break;
@@ -926,8 +938,8 @@ nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
       break;
    case NV_OP_JOIN:
    case NV_OP_NOP:
-      pc->emit[0] = 0x00003c00;
-      pc->emit[1] = 0x00000000;
+      pc->emit[0] = 0x00003de4;
+      pc->emit[1] = 0x40000000;
       break;
    case NV_OP_SELP:
       emit_selp(pc, i);
diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
index a6797db9c5..26f9e735fb 100644
--- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
+++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
@@ -1494,7 +1494,7 @@ bld_instruction(struct bld_context *bld,
       bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
       bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
 
-      src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE,
+      src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ,
                       emit_fetch(bld, insn, 0, 0), bld->zero);
 
       bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0));
-- 
cgit v1.2.3


From 608b3c4432f7b7b0c27fc22369e09c8b7d8cfc03 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 19 Dec 2010 21:49:32 +0100
Subject: nvc0: improve shader support for texturing

Fixed shadow and cube texture fetches, add array texture fetches.
---
 src/gallium/drivers/nvc0/nvc0_pc.c          |   2 +-
 src/gallium/drivers/nvc0/nvc0_pc.h          |  12 ++-
 src/gallium/drivers/nvc0/nvc0_pc_emit.c     |  28 ++++--
 src/gallium/drivers/nvc0/nvc0_pc_print.c    |   2 +
 src/gallium/drivers/nvc0/nvc0_pc_regalloc.c |   2 +
 src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c  | 132 +++++++++++++++++++++-------
 6 files changed, 133 insertions(+), 45 deletions(-)

(limited to 'src/gallium/drivers/nvc0/nvc0_pc.h')

diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c
index cf7b8e347f..72483f120e 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc.c
@@ -397,7 +397,7 @@ nvc0_generate_code(struct nvc0_translation_info *ti)
    if (ret)
       goto out;
 #if NOUVEAU_DEBUG > 1
-   nv_print_program(pc);
+   nvc0_print_program(pc);
    nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0);
 #endif
 
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h
index df0314965a..74867f02e7 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc.h
+++ b/src/gallium/drivers/nvc0/nvc0_pc.h
@@ -52,7 +52,8 @@
 #define NV_OP_NOP        5
 
 /**
- * BIND forces source operand i into the same register as destination operand i
+ * BIND forces source operand i into the same register as destination operand i,
+ *  and the operands will be assigned consecutive registers (needed for TEX)
  * SELECT forces its multiple source operands and its destination operand into
  *  one and the same register.
  */
@@ -152,8 +153,9 @@
 #define NV_OP_SUB_S32     81
 #define NV_OP_MAD_F32     NV_OP_MAD
 #define NV_OP_FSET_F32    82
+#define NV_OP_TXG         83
 
-#define NV_OP_COUNT     83
+#define NV_OP_COUNT     84
 
 /* nv50 files omitted */
 #define NV_FILE_GPR      0
@@ -380,9 +382,11 @@ struct nv_instruction {
    unsigned flat       : 1;
    unsigned patch      : 1;
    unsigned lanes      : 4; /* 3rd byte */
-   unsigned tex_argc   : 3;
+   unsigned tex_dim    : 2;
+   unsigned tex_array  : 1;
+   unsigned tex_cube   : 1;
+   unsigned tex_shadow : 1; /* 4th byte */
    unsigned tex_live   : 1;
-   unsigned tex_cube   : 1; /* 4th byte */
    unsigned tex_mask   : 4;
 
    uint8_t quadop;
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
index cd1ad03b00..2f99d5a339 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
@@ -391,23 +391,37 @@ emit_minmax(struct nv_pc *pc, struct nv_instruction *i)
 static void
 emit_tex(struct nv_pc *pc, struct nv_instruction *i)
 {
+   int src1 = i->tex_array + i->tex_dim + i->tex_cube;
+
    pc->emit[0] = 0x00000086;
    pc->emit[1] = 0x80000000;
 
-   if (i->opcode == NV_OP_TXB) pc->emit[1] |= 0x04000000;
-   else
-   if (i->opcode == NV_OP_TXL) pc->emit[1] |= 0x06000000;
+   switch (i->opcode) {
+   case NV_OP_TEX: pc->emit[1] = 0x80000000; break;
+   case NV_OP_TXB: pc->emit[1] = 0x84000000; break;
+   case NV_OP_TXL: pc->emit[1] = 0x86000000; break;
+   case NV_OP_TXF: pc->emit[1] = 0x90000000; break;
+   case NV_OP_TXG: pc->emit[1] = 0xe0000000; break;
+   default:
+      assert(0);
+      break;
+   }
 
-   set_pred(pc, i);
+   if (i->tex_array)
+      pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */
+   if (i->tex_shadow)
+      pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */
 
-   if (1)
-      pc->emit[0] |= 63 << 26; /* explicit derivatives */
+   set_pred(pc, i);
 
    DID(pc, i->def[0], 14);
    SID(pc, i->src[0], 20);
+   SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */
 
    pc->emit[1] |= i->tex_mask << 14;
-   pc->emit[1] |= (i->tex_argc - 1) << 20;
+   pc->emit[1] |= (i->tex_dim - 1) << 20;
+   if (i->tex_cube)
+      pc->emit[1] |= 3 << 20;
 
    assert(i->ext.tex.s < 16);
 
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c
index 9eac5ad900..6249f1fd1c 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_print.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c
@@ -371,5 +371,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
 
    { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
 
+   { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+
    { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }
 };
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
index 6f9d5de197..d24f09a150 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
@@ -492,6 +492,8 @@ pass_join_values(struct nv_pc_pass *ctx, int iter)
       case NV_OP_TXB:
       case NV_OP_TXL:
       case NV_OP_TXQ:
+         /* on nvc0, TEX src and dst can differ */
+         break;
       case NV_OP_BIND:
          if (iter)
             break;
diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
index d0c8275489..fecfc76fb7 100644
--- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
+++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
@@ -1156,30 +1156,59 @@ bld_lit(struct bld_context *bld, struct nv_value *dst0[4],
 }
 
 static INLINE void
-get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg)
+describe_texture_target(unsigned target, int *dim,
+                        int *array, int *cube, int *shadow)
 {
-   switch (insn->Texture.Texture) {
+   *array = *cube = *shadow = 0;
+
+   switch (target) {
    case TGSI_TEXTURE_1D:
-      *arg = *dim = 1;
+      *dim = 1;
       break;
    case TGSI_TEXTURE_SHADOW1D:
-      *dim = 1;
-      *arg = 2;
+      *dim = *shadow = 1;
       break;
    case TGSI_TEXTURE_UNKNOWN:
    case TGSI_TEXTURE_2D:
    case TGSI_TEXTURE_RECT:
-      *arg = *dim = 2;
+      *dim = 2;
       break;
    case TGSI_TEXTURE_SHADOW2D:
    case TGSI_TEXTURE_SHADOWRECT:
       *dim = 2;
-      *arg = 3;
+      *shadow = 1;
       break;
    case TGSI_TEXTURE_3D:
+      *dim = 3;
+      break;
    case TGSI_TEXTURE_CUBE:
-      *dim = *arg = 3;
+      *dim = 2;
+      *cube = 1;
+      break;
+      /*
+   case TGSI_TEXTURE_CUBE_ARRAY:
+      *dim = 2;
+      *cube = *array = 1;
       break;
+   case TGSI_TEXTURE_1D_ARRAY:
+      *dim = *array = 1;
+      break;
+   case TGSI_TEXTURE_2D_ARRAY:
+      *dim = 2;
+      *array = 1;
+      break;
+   case TGSI_TEXTURE_SHADOW1D_ARRAY:
+      *dim = *array = *shadow = 1;
+      break;
+   case TGSI_TEXTURE_SHADOW2D_ARRAY:
+      *dim = 2;
+      *array = *shadow = 1;
+      break;
+   case TGSI_TEXTURE_CUBE_ARRAY:
+      *dim = 2;
+      *array = *cube = 1;
+      break;
+      */
    default:
       assert(0);
       break;
@@ -1215,13 +1244,13 @@ bld_clone(struct bld_context *bld, struct nv_instruction *nvi)
 /* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */
 static void
 load_proj_tex_coords(struct bld_context *bld,
-                     struct nv_value *t[4], int dim, int arg,
+                     struct nv_value *t[4], int dim, int shadow,
                      const struct tgsi_full_instruction *insn)
 {
    int c;
    unsigned mask = (1 << dim) - 1;
 
-   if (arg != dim)
+   if (shadow)
       mask |= 4; /* depth comparison value */
 
    t[3] = emit_fetch(bld, insn, 0, 3);
@@ -1279,33 +1308,68 @@ bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane,
    return val;
 }
 
+/* order of TGSI operands: x y z layer shadow lod/bias */
+/* order of native operands: layer x y z | lod/bias shadow */
 static struct nv_instruction *
-emit_tex(struct bld_context *bld, uint opcode,
-         struct nv_value *dst[4], struct nv_value *t_in[4],
-         int argc, int tic, int tsc, int cube)
+emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc,
+         struct nv_value *dst[4], struct nv_value *arg[4],
+         int dim, int array, int cube, int shadow)
 {
-   struct nv_value *t[4];
-   struct nv_instruction *nvi;
+   struct nv_value *src[4];
+   struct nv_instruction *nvi, *bnd;
    int c;
+   int s = 0;
+   boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
+
+   if (array)
+      arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]);
+
+   /* ensure that all inputs reside in a GPR */
+   for (c = 0; c < dim + array + cube + shadow; ++c)
+      (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1;
+
+   /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */
+
+   bnd = new_instruction(bld->pc, NV_OP_BIND);
+   if (array) {
+      src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
+      bld_def(bnd, s, src[s]);
+      nv_reference(bld->pc, bnd, s++, arg[dim + cube]);
+   }
+   for (c = 0; c < dim + cube; ++c, ++s) {
+      src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4));
+      nv_reference(bld->pc, bnd, s, arg[c]);
+   }
+
+   if (shadow || lodbias) {
+      bnd = new_instruction(bld->pc, NV_OP_BIND);
 
-   /* the inputs to a tex instruction must be separate values */
-   for (c = 0; c < argc; ++c) {
-      t[c] = bld_insn_1(bld, NV_OP_MOV, t_in[c]);
-      t[c]->insn->fixed = 1;
+      if (lodbias) {
+         src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
+         bld_def(bnd, 0, src[s++]);
+         nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]);
+      }
+      if (shadow) {
+         src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
+         bld_def(bnd, lodbias, src[s++]);
+         nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]);
+      }
    }
 
    nvi = new_instruction(bld->pc, opcode);
    for (c = 0; c < 4; ++c)
       dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4));
-   for (c = 0; c < argc; ++c)
-      nv_reference(bld->pc, nvi, c, t[c]);
+   for (c = 0; c < s; ++c)
+      nv_reference(bld->pc, nvi, c, src[c]);
 
    nvi->ext.tex.t = tic;
    nvi->ext.tex.s = tsc;
    nvi->tex_mask = 0xf;
    nvi->tex_cube = cube;
+   nvi->tex_dim = dim;
+   nvi->tex_cube = cube;
+   nvi->tex_shadow = shadow;
    nvi->tex_live = 0;
-   nvi->tex_argc = argc;
 
    return nvi;
 }
@@ -1326,24 +1390,25 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
 {
    struct nv_value *t[4], *s[3];
    uint opcode = translate_opcode(insn->Instruction.Opcode);
-   int arg, dim, c;
+   int c, dim, array, cube, shadow;
+   const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
    const int tic = insn->Src[1].Register.Index;
    const int tsc = tic;
-   const int cube = (insn->Texture.Texture  == TGSI_TEXTURE_CUBE) ? 1 : 0;
 
-   get_tex_dim(insn, &dim, &arg);
+   describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow);
+
+   assert(dim + array + shadow + lodbias <= 5);
 
    if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
-      load_proj_tex_coords(bld, t, dim, arg, insn);
+      load_proj_tex_coords(bld, t, dim, shadow, insn);
    else {
-      for (c = 0; c < dim; ++c)
+      for (c = 0; c < dim + cube + array; ++c)
          t[c] = emit_fetch(bld, insn, 0, c);
-      if (arg != dim)
-         t[dim] = emit_fetch(bld, insn, 0, 2);
+      if (shadow)
+         t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2));
    }
 
    if (cube) {
-      assert(dim >= 3);
       for (c = 0; c < 3; ++c)
          s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]);
 
@@ -1355,9 +1420,10 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
          t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]);
    }
 
-   if (opcode == NV_OP_TXB || opcode == NV_OP_TXL)
-      t[arg++] = emit_fetch(bld, insn, 0, 3);
-   emit_tex(bld, opcode, dst0, t, arg, tic, tsc, cube);
+   if (lodbias)
+      t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3);
+
+   emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow);
 }
 
 static INLINE struct nv_value *
-- 
cgit v1.2.3