From 4c2247538394a313e1e90bfcd07c1ab9c7d41281 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 12 Nov 2010 15:17:40 +0100 Subject: nvc0: import nvc0 gallium driver --- src/gallium/drivers/nvc0/nvc0_pc.h | 648 +++++++++++++++++++++++++++++++++++++ 1 file changed, 648 insertions(+) create mode 100644 src/gallium/drivers/nvc0/nvc0_pc.h (limited to 'src/gallium/drivers/nvc0/nvc0_pc.h') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h new file mode 100644 index 0000000000..b48b0b1fba --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -0,0 +1,648 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NVC0_COMPILER_H__ +#define __NVC0_COMPILER_H__ + +#include + +#ifndef NOUVEAU_DBG +#ifdef NOUVEAU_DEBUG +# define NOUVEAU_DBG(args...) debug_printf(args); +#else +# define NOUVEAU_DBG(args...) +#endif +#endif + +#ifndef NOUVEAU_ERR +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); +#endif + +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +/* pseudo opcodes */ +#define NV_OP_UNDEF 0 +#define NV_OP_BIND 1 +#define NV_OP_MERGE 2 +#define NV_OP_PHI 3 +#define NV_OP_SELECT 4 +#define NV_OP_NOP 5 + +/** + * BIND forces source operand i into the same register as destination operand i + * SELECT forces its multiple source operands and its destination operand into + * one and the same register. + */ + +/* base opcodes */ +#define NV_OP_LD 6 +#define NV_OP_ST 7 +#define NV_OP_MOV 8 +#define NV_OP_AND 9 +#define NV_OP_OR 10 +#define NV_OP_XOR 11 +#define NV_OP_SHL 12 +#define NV_OP_SHR 13 +#define NV_OP_NOT 14 +#define NV_OP_SET 15 +#define NV_OP_ADD 16 +#define NV_OP_SUB 17 +#define NV_OP_MUL 18 +#define NV_OP_MAD 19 +#define NV_OP_ABS 20 +#define NV_OP_NEG 21 +#define NV_OP_MAX 22 +#define NV_OP_MIN 23 +#define NV_OP_CVT 24 +#define NV_OP_CEIL 25 +#define NV_OP_FLOOR 26 +#define NV_OP_TRUNC 27 +#define NV_OP_SAD 28 + +/* shader opcodes */ +#define NV_OP_VFETCH 29 +#define NV_OP_PFETCH 30 +#define NV_OP_EXPORT 31 +#define NV_OP_LINTERP 32 +#define NV_OP_PINTERP 33 +#define NV_OP_EMIT 34 +#define NV_OP_RESTART 35 +#define NV_OP_TEX 36 +#define NV_OP_TXB 37 +#define NV_OP_TXL 38 +#define NV_OP_TXF 39 +#define NV_OP_TXQ 40 +#define NV_OP_QUADOP 41 +#define NV_OP_DFDX 42 +#define NV_OP_DFDY 43 +#define NV_OP_KIL 44 + +/* control flow opcodes */ +#define NV_OP_BRA 45 +#define NV_OP_CALL 46 +#define NV_OP_RET 47 +#define NV_OP_EXIT 48 +#define NV_OP_BREAK 49 +#define NV_OP_BREAKADDR 50 +#define NV_OP_JOINAT 51 +#define NV_OP_JOIN 52 + +/* typed opcodes */ +#define NV_OP_ADD_F32 NV_OP_ADD +#define NV_OP_ADD_B32 53 +#define NV_OP_MUL_F32 NV_OP_MUL +#define NV_OP_MUL_B32 54 +#define NV_OP_ABS_F32 NV_OP_ABS +#define NV_OP_ABS_S32 55 +#define NV_OP_NEG_F32 NV_OP_NEG +#define NV_OP_NEG_S32 56 +#define NV_OP_MAX_F32 NV_OP_MAX +#define NV_OP_MAX_S32 57 +#define NV_OP_MAX_U32 58 +#define NV_OP_MIN_F32 NV_OP_MIN +#define NV_OP_MIN_S32 59 +#define NV_OP_MIN_U32 60 +#define NV_OP_SET_F32 61 +#define NV_OP_SET_S32 62 +#define NV_OP_SET_U32 63 +#define NV_OP_SAR 64 +#define NV_OP_RCP 65 +#define NV_OP_RSQ 66 +#define NV_OP_LG2 67 +#define NV_OP_SIN 68 +#define NV_OP_COS 69 +#define NV_OP_EX2 70 +#define NV_OP_PRESIN 71 +#define NV_OP_PREEX2 72 +#define NV_OP_SAT 73 + +/* newly added opcodes */ +#define NV_OP_SET_F32_AND 74 +#define NV_OP_SET_F32_OR 75 +#define NV_OP_SET_F32_XOR 76 +#define NV_OP_SELP 77 +#define NV_OP_SLCT 78 +#define NV_OP_SLCT_F32 NV_OP_SLCT +#define NV_OP_SLCT_S32 79 +#define NV_OP_SLCT_U32 80 +#define NV_OP_SUB_F32 NV_OP_SUB +#define NV_OP_SUB_S32 81 +#define NV_OP_MAD_F32 NV_OP_MAD +#define NV_OP_FSET_F32 82 + +#define NV_OP_COUNT 83 + +/* nv50 files omitted */ +#define NV_FILE_GPR 0 +#define NV_FILE_COND 1 +#define NV_FILE_PRED 2 +#define NV_FILE_IMM 16 +#define NV_FILE_MEM_S 32 +#define NV_FILE_MEM_V 34 +#define NV_FILE_MEM_A 35 +#define NV_FILE_MEM_L 48 +#define NV_FILE_MEM_G 64 +#define NV_FILE_MEM_C(i) (80 + i) + +#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S) + +#define NV_MOD_NEG 1 +#define NV_MOD_ABS 2 +#define NV_MOD_NOT 4 +#define NV_MOD_SAT 8 + +#define NV_TYPE_U8 0x00 +#define NV_TYPE_S8 0x01 +#define NV_TYPE_U16 0x02 +#define NV_TYPE_S16 0x03 +#define NV_TYPE_U32 0x04 +#define NV_TYPE_S32 0x05 +#define NV_TYPE_P32 0x07 +#define NV_TYPE_F32 0x09 +#define NV_TYPE_F64 0x0b +#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4)) +#define NV_TYPE_ANY 0xff + +#define NV_TYPE_ISINT(t) ((t) < 7) +#define NV_TYPE_ISSGD(t) ((t) & 1) + +#define NV_CC_FL 0x0 +#define NV_CC_LT 0x1 +#define NV_CC_EQ 0x2 +#define NV_CC_LE 0x3 +#define NV_CC_GT 0x4 +#define NV_CC_NE 0x5 +#define NV_CC_GE 0x6 +#define NV_CC_U 0x8 +#define NV_CC_TR 0xf +#define NV_CC_O 0x10 +#define NV_CC_C 0x11 +#define NV_CC_A 0x12 +#define NV_CC_S 0x13 + +#define NV_PC_MAX_INSTRUCTIONS 2048 +#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) + +#define NV_PC_MAX_BASIC_BLOCKS 1024 + +struct nv_op_info { + uint base; /* e.g. ADD_S32 -> ADD */ + char name[12]; + uint8_t type; + uint8_t mods; + unsigned flow : 1; + unsigned commutative : 1; + unsigned vector : 1; + unsigned predicate : 1; + unsigned pseudo : 1; + unsigned immediate : 3; + unsigned memory : 3; +}; + +extern struct nv_op_info nvc0_op_info_table[]; + +#define NV_BASEOP(op) (nvc0_op_info_table[op].base) +#define NV_OPTYPE(op) (nvc0_op_info_table[op].type) + +static INLINE uint +nv_op_base(uint opcode) +{ + return nvc0_op_info_table[opcode].base; +} + +static INLINE boolean +nv_is_texture_op(uint opcode) +{ + return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ); +} + +static INLINE boolean +nv_is_vector_op(uint opcode) +{ + return nvc0_op_info_table[opcode].vector ? TRUE : FALSE; +} + +static INLINE boolean +nv_op_commutative(uint opcode) +{ + return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE; +} + +static INLINE uint8_t +nv_op_supported_src_mods(uint opcode) +{ + return nvc0_op_info_table[opcode].mods; +} + +static INLINE boolean +nv_op_predicateable(uint opcode) +{ + return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE; +} + +static INLINE uint +nv_type_order(ubyte type) +{ + switch (type & 0xf) { + case NV_TYPE_U8: + case NV_TYPE_S8: + return 0; + case NV_TYPE_U16: + case NV_TYPE_S16: + return 1; + case NV_TYPE_U32: + case NV_TYPE_F32: + case NV_TYPE_S32: + case NV_TYPE_P32: + return 2; + case NV_TYPE_F64: + return 3; + } + assert(0); + return 0; +} + +static INLINE uint +nv_type_sizeof(ubyte type) +{ + if (type & 0xf0) + return (1 << nv_type_order(type)) * (type >> 4); + return 1 << nv_type_order(type); +} + +static INLINE uint +nv_type_sizeof_base(ubyte type) +{ + return 1 << nv_type_order(type); +} + +struct nv_reg { + uint32_t address; /* for memory locations */ + int id; /* for registers */ + ubyte file; + ubyte size; + union { + int32_t s32; + int64_t s64; + uint64_t u64; + uint32_t u32; + float f32; + double f64; + } imm; +}; + +struct nv_range { + struct nv_range *next; + int bgn; + int end; +}; + +struct nv_ref; + +struct nv_value { + struct nv_reg reg; + struct nv_instruction *insn; + struct nv_value *join; + struct nv_ref *last_use; + int n; + struct nv_range *livei; + int refc; + struct nv_value *next; + struct nv_value *prev; +}; + +struct nv_ref { + struct nv_value *value; + struct nv_instruction *insn; + struct list_head list; /* connects uses of the same value */ + uint8_t mod; + uint8_t flags; +}; + +struct nv_basic_block; + +struct nv_instruction { + struct nv_instruction *next; + struct nv_instruction *prev; + uint opcode; + uint serial; + + struct nv_value *def[5]; + struct nv_ref *src[6]; + + int8_t predicate; /* index of predicate src */ + int8_t indirect; /* index of pointer src */ + + union { + struct { + uint8_t t; /* TIC binding */ + uint8_t s; /* TSC binding */ + } tex; + struct { + uint8_t d; /* output type */ + uint8_t s; /* input type */ + } cvt; + } ext; + + struct nv_basic_block *bb; + struct nv_basic_block *target; /* target block of control flow insn */ + + unsigned cc : 5; /* condition code */ + unsigned fixed : 1; /* don't optimize away (prematurely) */ + unsigned terminator : 1; + unsigned join : 1; + unsigned set_cond : 4; /* 2nd byte */ + unsigned saturate : 1; + unsigned centroid : 1; + unsigned flat : 1; + unsigned patch : 1; + unsigned lanes : 4; /* 3rd byte */ + unsigned tex_argc : 3; + unsigned tex_live : 1; + unsigned tex_cube : 1; /* 4th byte */ + unsigned tex_mask : 4; + + uint8_t quadop; +}; + +static INLINE int +nvi_vector_size(struct nv_instruction *nvi) +{ + int i; + assert(nvi); + for (i = 0; i < 5 && nvi->def[i]; ++i); + return i; +} + +#define CFG_EDGE_FORWARD 0 +#define CFG_EDGE_BACK 1 +#define CFG_EDGE_LOOP_ENTER 2 +#define CFG_EDGE_LOOP_LEAVE 4 +#define CFG_EDGE_FAKE 8 + +/* 'WALL' edge means where reachability check doesn't follow */ +/* 'LOOP' edge means just having to do with loops */ +#define IS_LOOP_EDGE(k) ((k) & 7) +#define IS_WALL_EDGE(k) ((k) & 9) + +struct nv_basic_block { + struct nv_instruction *entry; /* first non-phi instruction */ + struct nv_instruction *exit; + struct nv_instruction *phi; /* very first instruction */ + int num_instructions; + + struct nv_basic_block *out[2]; /* no indirect branches -> 2 */ + struct nv_basic_block *in[8]; /* hope that suffices */ + uint num_in; + ubyte out_kind[2]; + ubyte in_kind[8]; + + int id; + int subroutine; + uint priv; /* reset to 0 after you're done */ + uint pass_seq; + + uint32_t emit_pos; /* position, size in emitted code */ + uint32_t emit_size; + + uint32_t live_set[NV_PC_MAX_VALUES / 32]; +}; + +struct nvc0_translation_info; + +struct nv_pc { + struct nv_basic_block **root; + struct nv_basic_block *current_block; + struct nv_basic_block *parent_block; + + int loop_nesting_bound; + uint pass_seq; + + struct nv_value values[NV_PC_MAX_VALUES]; + struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS]; + struct nv_ref **refs; + struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS]; + int num_values; + int num_instructions; + int num_refs; + int num_blocks; + int num_subroutines; + + int max_reg[4]; + + uint32_t *immd_buf; /* populated on emit */ + unsigned immd_count; + + uint32_t *emit; + unsigned emit_size; + unsigned emit_pos; + + void *reloc_entries; + unsigned num_relocs; + + /* optimization enables */ + boolean opt_reload_elim; + boolean is_fragprog; +}; + +void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *); +void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *); + +static INLINE struct nv_instruction * +nv_alloc_instruction(struct nv_pc *pc, uint opcode) +{ + struct nv_instruction *insn; + + insn = &pc->instructions[pc->num_instructions++]; + assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS); + + insn->opcode = opcode; + insn->cc = 0; + insn->indirect = -1; + insn->predicate = -1; + + return insn; +} + +static INLINE struct nv_instruction * +new_instruction(struct nv_pc *pc, uint opcode) +{ + struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); + + nvc0_insn_append(pc->current_block, insn); + return insn; +} + +static INLINE struct nv_instruction * +new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode) +{ + struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); + + nvc0_insn_insert_after(at, insn); + return insn; +} + +static INLINE struct nv_value * +new_value(struct nv_pc *pc, ubyte file, ubyte size) +{ + struct nv_value *value = &pc->values[pc->num_values]; + + assert(pc->num_values < NV_PC_MAX_VALUES - 1); + + value->n = pc->num_values++; + value->join = value; + value->reg.id = -1; + value->reg.file = file; + value->reg.size = size; + return value; +} + +static INLINE struct nv_value * +new_value_like(struct nv_pc *pc, struct nv_value *like) +{ + return new_value(pc, like->reg.file, like->reg.size); +} + +static INLINE struct nv_ref * +new_ref(struct nv_pc *pc, struct nv_value *val) +{ + int i; + struct nv_ref *ref; + + if ((pc->num_refs % 64) == 0) { + const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *); + const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *); + + pc->refs = REALLOC(pc->refs, old_size, new_size); + + ref = CALLOC(64, sizeof(struct nv_ref)); + for (i = 0; i < 64; ++i) + pc->refs[pc->num_refs + i] = &ref[i]; + } + + ref = pc->refs[pc->num_refs++]; + ref->value = val; + + LIST_INITHEAD(&ref->list); + + ++val->refc; + return ref; +} + +static INLINE struct nv_basic_block * +new_basic_block(struct nv_pc *pc) +{ + struct nv_basic_block *bb; + + if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS) + return NULL; + + bb = CALLOC_STRUCT(nv_basic_block); + + bb->id = pc->num_blocks; + pc->bb_list[pc->num_blocks++] = bb; + return bb; +} + +static INLINE void +nv_reference(struct nv_pc *pc, + struct nv_instruction *nvi, int c, struct nv_value *s) +{ + struct nv_ref **d = &nvi->src[c]; + assert(c < 6); + + if (*d) { + --(*d)->value->refc; + LIST_DEL(&(*d)->list); + } + + if (s) { + if (!*d) { + *d = new_ref(pc, s); + (*d)->insn = nvi; + } else { + LIST_DEL(&(*d)->list); + (*d)->value = s; + ++(s->refc); + } + if (!s->last_use) + s->last_use = *d; + else + LIST_ADDTAIL(&s->last_use->list, &(*d)->list); + + s->last_use = *d; + (*d)->insn = nvi; + } else { + *d = NULL; + } +} + +/* nvc0_emit.c */ +void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *); + +/* nvc0_print.c */ +const char *nvc0_opcode_name(uint opcode); +void nvc0_print_instruction(struct nv_instruction *); + +/* nvc0_pc.c */ +void nvc0_print_function(struct nv_basic_block *root); +void nvc0_print_program(struct nv_pc *); + +boolean nvc0_insn_can_load(struct nv_instruction *, int s, + struct nv_instruction *); +boolean nvc0_insn_is_predicateable(struct nv_instruction *); + +int nvc0_insn_refcount(struct nv_instruction *); +void nvc0_insn_delete(struct nv_instruction *); +void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *); + +void nvc0_bblock_attach(struct nv_basic_block *parent, + struct nv_basic_block *child, ubyte edge_kind); +boolean nvc0_bblock_dominated_by(struct nv_basic_block *, + struct nv_basic_block *); +boolean nvc0_bblock_reachable_by(struct nv_basic_block *future, + struct nv_basic_block *past, + struct nv_basic_block *final); +struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *); + +int nvc0_pc_replace_value(struct nv_pc *pc, + struct nv_value *old_val, + struct nv_value *new_val); + +struct nv_value *nvc0_pc_find_immediate(struct nv_ref *); +struct nv_value *nvc0_pc_find_constant(struct nv_ref *); + +typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b); + +void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *); + +int nvc0_pc_exec_pass0(struct nv_pc *pc); +int nvc0_pc_exec_pass1(struct nv_pc *pc); +int nvc0_pc_exec_pass2(struct nv_pc *pc); + +int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *); + +#endif // NV50_COMPILER_H -- cgit v1.2.3 From 51f22689a419a8a13ca105e8ffc905b5fadea0db Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 10 Dec 2010 20:13:40 +0100 Subject: nvc0: fix branching ops - bra is PC relative - jump to else condition was inverted - handle integer comparisons --- src/gallium/drivers/nvc0/nvc0_pc.c | 3 ++- src/gallium/drivers/nvc0/nvc0_pc.h | 6 +++--- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 28 ++++++++++++++++++++-------- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 2 +- 4 files changed, 26 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_pc.h') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index e38f6ced24..cf7b8e347f 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -328,6 +328,7 @@ nvc0_emit_program(struct nv_pc *pc) NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size); + pc->emit_pos = 0; for (n = 0; n < pc->num_blocks; ++n) { struct nv_instruction *i; struct nv_basic_block *b = pc->bb_list[n]; @@ -335,7 +336,7 @@ nvc0_emit_program(struct nv_pc *pc) for (i = b->entry; i; i = i->next) { nvc0_emit_instruction(pc, i); pc->emit += 2; - pc->emit_pos += 2; + pc->emit_pos += 8; } } assert(pc->emit == &code[pc->emit_size / 4]); diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index b48b0b1fba..df0314965a 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -425,7 +425,7 @@ struct nv_basic_block { uint priv; /* reset to 0 after you're done */ uint pass_seq; - uint32_t emit_pos; /* position, size in emitted code */ + uint32_t emit_pos; /* position, size in emitted code (in bytes) */ uint32_t emit_size; uint32_t live_set[NV_PC_MAX_VALUES / 32]; @@ -457,8 +457,8 @@ struct nv_pc { unsigned immd_count; uint32_t *emit; - unsigned emit_size; - unsigned emit_pos; + uint32_t emit_size; + uint32_t emit_pos; void *reloc_entries; unsigned num_relocs; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index 6735f93fd3..cd1ad03b00 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -73,7 +73,7 @@ create_fixup(struct nv_pc *pc, uint8_t ty, f = (struct nvc0_fixup *)pc->reloc_entries; - f[n].ofst = (pc->emit_pos + w) * 4; + f[n].ofst = pc->emit_pos + w * 4; f[n].type = ty; f[n].data = data; f[n].mask = m; @@ -217,19 +217,26 @@ const_space_index(struct nv_instruction *i, int s) static void emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op) { - pc->emit[0] = 0x000001e7; + pc->emit[0] = 0x00000007; pc->emit[1] = op << 24; - set_pred(pc, i); + if (op == 0x40 || (op >= 0x80 && op <= 0x98)) { + /* bra, exit, ret or kil */ + pc->emit[0] |= 0x1e0; + set_pred(pc, i); + } if (i->target) { - uint32_t pos = i->target->emit_pos; + int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8); + /* we will need relocations only for global functions */ + /* create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000); create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff); + */ - pc->emit[0] |= (pos & 0x3f) << 26; - pc->emit[1] |= (pos >> 6) & 0x1ffff; + pc->emit[0] |= (pcrel & 0x3f) << 26; + pc->emit[1] |= (pcrel >> 6) & 0x1ffff; } } @@ -893,6 +900,11 @@ nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) emit_mul_f32(pc, i); break; case NV_OP_SET_F32: + case NV_OP_SET_F32_AND: + case NV_OP_SET_F32_OR: + case NV_OP_SET_F32_XOR: + case NV_OP_SET_S32: + case NV_OP_SET_U32: case NV_OP_FSET_F32: emit_set(pc, i); break; @@ -926,8 +938,8 @@ nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) break; case NV_OP_JOIN: case NV_OP_NOP: - pc->emit[0] = 0x00003c00; - pc->emit[1] = 0x00000000; + pc->emit[0] = 0x00003de4; + pc->emit[1] = 0x40000000; break; case NV_OP_SELP: emit_selp(pc, i); diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index a6797db9c5..26f9e735fb 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1494,7 +1494,7 @@ bld_instruction(struct bld_context *bld, bld->join_bb[bld->cond_lvl] = bld->pc->current_block; bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; - src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE, + src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ, emit_fetch(bld, insn, 0, 0), bld->zero); bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0)); -- cgit v1.2.3 From 608b3c4432f7b7b0c27fc22369e09c8b7d8cfc03 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 19 Dec 2010 21:49:32 +0100 Subject: nvc0: improve shader support for texturing Fixed shadow and cube texture fetches, add array texture fetches. --- src/gallium/drivers/nvc0/nvc0_pc.c | 2 +- src/gallium/drivers/nvc0/nvc0_pc.h | 12 ++- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 28 ++++-- src/gallium/drivers/nvc0/nvc0_pc_print.c | 2 + src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 2 + src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 132 +++++++++++++++++++++------- 6 files changed, 133 insertions(+), 45 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_pc.h') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index cf7b8e347f..72483f120e 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -397,7 +397,7 @@ nvc0_generate_code(struct nvc0_translation_info *ti) if (ret) goto out; #if NOUVEAU_DEBUG > 1 - nv_print_program(pc); + nvc0_print_program(pc); nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0); #endif diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index df0314965a..74867f02e7 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -52,7 +52,8 @@ #define NV_OP_NOP 5 /** - * BIND forces source operand i into the same register as destination operand i + * BIND forces source operand i into the same register as destination operand i, + * and the operands will be assigned consecutive registers (needed for TEX) * SELECT forces its multiple source operands and its destination operand into * one and the same register. */ @@ -152,8 +153,9 @@ #define NV_OP_SUB_S32 81 #define NV_OP_MAD_F32 NV_OP_MAD #define NV_OP_FSET_F32 82 +#define NV_OP_TXG 83 -#define NV_OP_COUNT 83 +#define NV_OP_COUNT 84 /* nv50 files omitted */ #define NV_FILE_GPR 0 @@ -380,9 +382,11 @@ struct nv_instruction { unsigned flat : 1; unsigned patch : 1; unsigned lanes : 4; /* 3rd byte */ - unsigned tex_argc : 3; + unsigned tex_dim : 2; + unsigned tex_array : 1; + unsigned tex_cube : 1; + unsigned tex_shadow : 1; /* 4th byte */ unsigned tex_live : 1; - unsigned tex_cube : 1; /* 4th byte */ unsigned tex_mask : 4; uint8_t quadop; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index cd1ad03b00..2f99d5a339 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -391,23 +391,37 @@ emit_minmax(struct nv_pc *pc, struct nv_instruction *i) static void emit_tex(struct nv_pc *pc, struct nv_instruction *i) { + int src1 = i->tex_array + i->tex_dim + i->tex_cube; + pc->emit[0] = 0x00000086; pc->emit[1] = 0x80000000; - if (i->opcode == NV_OP_TXB) pc->emit[1] |= 0x04000000; - else - if (i->opcode == NV_OP_TXL) pc->emit[1] |= 0x06000000; + switch (i->opcode) { + case NV_OP_TEX: pc->emit[1] = 0x80000000; break; + case NV_OP_TXB: pc->emit[1] = 0x84000000; break; + case NV_OP_TXL: pc->emit[1] = 0x86000000; break; + case NV_OP_TXF: pc->emit[1] = 0x90000000; break; + case NV_OP_TXG: pc->emit[1] = 0xe0000000; break; + default: + assert(0); + break; + } - set_pred(pc, i); + if (i->tex_array) + pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */ + if (i->tex_shadow) + pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */ - if (1) - pc->emit[0] |= 63 << 26; /* explicit derivatives */ + set_pred(pc, i); DID(pc, i->def[0], 14); SID(pc, i->src[0], 20); + SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */ pc->emit[1] |= i->tex_mask << 14; - pc->emit[1] |= (i->tex_argc - 1) << 20; + pc->emit[1] |= (i->tex_dim - 1) << 20; + if (i->tex_cube) + pc->emit[1] |= 3 << 20; assert(i->ext.tex.s < 16); diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 9eac5ad900..6249f1fd1c 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -371,5 +371,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 } }; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index 6f9d5de197..d24f09a150 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -492,6 +492,8 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) case NV_OP_TXB: case NV_OP_TXL: case NV_OP_TXQ: + /* on nvc0, TEX src and dst can differ */ + break; case NV_OP_BIND: if (iter) break; diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index d0c8275489..fecfc76fb7 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1156,30 +1156,59 @@ bld_lit(struct bld_context *bld, struct nv_value *dst0[4], } static INLINE void -get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg) +describe_texture_target(unsigned target, int *dim, + int *array, int *cube, int *shadow) { - switch (insn->Texture.Texture) { + *array = *cube = *shadow = 0; + + switch (target) { case TGSI_TEXTURE_1D: - *arg = *dim = 1; + *dim = 1; break; case TGSI_TEXTURE_SHADOW1D: - *dim = 1; - *arg = 2; + *dim = *shadow = 1; break; case TGSI_TEXTURE_UNKNOWN: case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: - *arg = *dim = 2; + *dim = 2; break; case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOWRECT: *dim = 2; - *arg = 3; + *shadow = 1; break; case TGSI_TEXTURE_3D: + *dim = 3; + break; case TGSI_TEXTURE_CUBE: - *dim = *arg = 3; + *dim = 2; + *cube = 1; + break; + /* + case TGSI_TEXTURE_CUBE_ARRAY: + *dim = 2; + *cube = *array = 1; break; + case TGSI_TEXTURE_1D_ARRAY: + *dim = *array = 1; + break; + case TGSI_TEXTURE_2D_ARRAY: + *dim = 2; + *array = 1; + break; + case TGSI_TEXTURE_SHADOW1D_ARRAY: + *dim = *array = *shadow = 1; + break; + case TGSI_TEXTURE_SHADOW2D_ARRAY: + *dim = 2; + *array = *shadow = 1; + break; + case TGSI_TEXTURE_CUBE_ARRAY: + *dim = 2; + *array = *cube = 1; + break; + */ default: assert(0); break; @@ -1215,13 +1244,13 @@ bld_clone(struct bld_context *bld, struct nv_instruction *nvi) /* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */ static void load_proj_tex_coords(struct bld_context *bld, - struct nv_value *t[4], int dim, int arg, + struct nv_value *t[4], int dim, int shadow, const struct tgsi_full_instruction *insn) { int c; unsigned mask = (1 << dim) - 1; - if (arg != dim) + if (shadow) mask |= 4; /* depth comparison value */ t[3] = emit_fetch(bld, insn, 0, 3); @@ -1279,33 +1308,68 @@ bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane, return val; } +/* order of TGSI operands: x y z layer shadow lod/bias */ +/* order of native operands: layer x y z | lod/bias shadow */ static struct nv_instruction * -emit_tex(struct bld_context *bld, uint opcode, - struct nv_value *dst[4], struct nv_value *t_in[4], - int argc, int tic, int tsc, int cube) +emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, + struct nv_value *dst[4], struct nv_value *arg[4], + int dim, int array, int cube, int shadow) { - struct nv_value *t[4]; - struct nv_instruction *nvi; + struct nv_value *src[4]; + struct nv_instruction *nvi, *bnd; int c; + int s = 0; + boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; + + if (array) + arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); + + /* ensure that all inputs reside in a GPR */ + for (c = 0; c < dim + array + cube + shadow; ++c) + (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1; + + /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ + + bnd = new_instruction(bld->pc, NV_OP_BIND); + if (array) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, s, src[s]); + nv_reference(bld->pc, bnd, s++, arg[dim + cube]); + } + for (c = 0; c < dim + cube; ++c, ++s) { + src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4)); + nv_reference(bld->pc, bnd, s, arg[c]); + } + + if (shadow || lodbias) { + bnd = new_instruction(bld->pc, NV_OP_BIND); - /* the inputs to a tex instruction must be separate values */ - for (c = 0; c < argc; ++c) { - t[c] = bld_insn_1(bld, NV_OP_MOV, t_in[c]); - t[c]->insn->fixed = 1; + if (lodbias) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, 0, src[s++]); + nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]); + } + if (shadow) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, lodbias, src[s++]); + nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]); + } } nvi = new_instruction(bld->pc, opcode); for (c = 0; c < 4; ++c) dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4)); - for (c = 0; c < argc; ++c) - nv_reference(bld->pc, nvi, c, t[c]); + for (c = 0; c < s; ++c) + nv_reference(bld->pc, nvi, c, src[c]); nvi->ext.tex.t = tic; nvi->ext.tex.s = tsc; nvi->tex_mask = 0xf; nvi->tex_cube = cube; + nvi->tex_dim = dim; + nvi->tex_cube = cube; + nvi->tex_shadow = shadow; nvi->tex_live = 0; - nvi->tex_argc = argc; return nvi; } @@ -1326,24 +1390,25 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], { struct nv_value *t[4], *s[3]; uint opcode = translate_opcode(insn->Instruction.Opcode); - int arg, dim, c; + int c, dim, array, cube, shadow; + const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; const int tic = insn->Src[1].Register.Index; const int tsc = tic; - const int cube = (insn->Texture.Texture == TGSI_TEXTURE_CUBE) ? 1 : 0; - get_tex_dim(insn, &dim, &arg); + describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow); + + assert(dim + array + shadow + lodbias <= 5); if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) - load_proj_tex_coords(bld, t, dim, arg, insn); + load_proj_tex_coords(bld, t, dim, shadow, insn); else { - for (c = 0; c < dim; ++c) + for (c = 0; c < dim + cube + array; ++c) t[c] = emit_fetch(bld, insn, 0, c); - if (arg != dim) - t[dim] = emit_fetch(bld, insn, 0, 2); + if (shadow) + t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2)); } if (cube) { - assert(dim >= 3); for (c = 0; c < 3; ++c) s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]); @@ -1355,9 +1420,10 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]); } - if (opcode == NV_OP_TXB || opcode == NV_OP_TXL) - t[arg++] = emit_fetch(bld, insn, 0, 3); - emit_tex(bld, opcode, dst0, t, arg, tic, tsc, cube); + if (lodbias) + t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3); + + emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow); } static INLINE struct nv_value * -- cgit v1.2.3