From 4c2247538394a313e1e90bfcd07c1ab9c7d41281 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 12 Nov 2010 15:17:40 +0100 Subject: nvc0: import nvc0 gallium driver --- src/gallium/drivers/nvc0/nvc0_pc_print.c | 375 +++++++++++++++++++++++++++++++ 1 file changed, 375 insertions(+) create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_print.c (limited to 'src/gallium/drivers/nvc0/nvc0_pc_print.c') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c new file mode 100644 index 0000000000..9eac5ad900 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -0,0 +1,375 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_pc.h" + +#define PRINT(args...) debug_printf(args) + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#endif + +static const char *norm = "\x1b[00m"; +static const char *gree = "\x1b[32m"; +static const char *blue = "\x1b[34m"; +static const char *cyan = "\x1b[36m"; +static const char *yllw = "\x1b[33m"; +static const char *mgta = "\x1b[35m"; + +static const char *nv_cond_names[] = +{ + "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "", + "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "", + "o", "c", "a", "s" +}; + +static const char *nv_modifier_strings[] = +{ + "", + "neg", + "abs", + "neg abs", + "not", + "not neg" + "not abs", + "not neg abs", + "sat", + "BAD_MOD" +}; + +const char * +nvc0_opcode_name(uint opcode) +{ + return nvc0_op_info_table[MIN2(opcode, NV_OP_COUNT)].name; +} + +static INLINE const char * +nv_type_name(ubyte type, ubyte size) +{ + switch (type) { + case NV_TYPE_U16: return "u16"; + case NV_TYPE_S16: return "s16"; + case NV_TYPE_F32: return "f32"; + case NV_TYPE_U32: return "u32"; + case NV_TYPE_S32: return "s32"; + case NV_TYPE_P32: return "p32"; + case NV_TYPE_F64: return "f64"; + case NV_TYPE_ANY: + { + switch (size) { + case 1: return "b8"; + case 2: return "b16"; + case 4: return "b32"; + case 8: return "b64"; + case 12: return "b96"; + case 16: return "b128"; + default: + return "BAD_SIZE"; + } + } + default: + return "BAD_TYPE"; + } +} + +static INLINE const char * +nv_cond_name(ubyte cc) +{ + return nv_cond_names[MIN2(cc, 19)]; +} + +static INLINE const char * +nv_modifier_string(ubyte mod) +{ + return nv_modifier_strings[MIN2(mod, 9)]; +} + +static INLINE int +nv_value_id(struct nv_value *value) +{ + if (value->join->reg.id >= 0) + return value->join->reg.id; + return value->n; +} + +static INLINE boolean +nv_value_allocated(struct nv_value *value) +{ + return (value->reg.id >= 0) ? TRUE : FALSE; +} + +static INLINE void +nv_print_address(const char c, int buf, struct nv_value *a, int offset) +{ + const char ac = (a && nv_value_allocated(a)) ? '$' : '%'; + char sg; + + if (offset < 0) { + sg = '-'; + offset = -offset; + } else { + sg = '+'; + } + + if (buf >= 0) + PRINT(" %s%c%i[", cyan, c, buf); + else + PRINT(" %s%c[", cyan, c); + if (a) + PRINT("%s%ca%i%s%c", mgta, ac, nv_value_id(a), cyan, sg); + PRINT("%s0x%x%s]", yllw, offset, cyan); +} + +static INLINE void +nv_print_value(struct nv_value *value, struct nv_value *indir, ubyte type) +{ + char reg_pfx = nv_value_allocated(value->join) ? '$' : '%'; + + if (value->reg.file != NV_FILE_PRED) + PRINT(" %s%s", gree, nv_type_name(type, value->reg.size)); + + switch (value->reg.file) { + case NV_FILE_GPR: + PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value)); + if (value->reg.size == 8) + PRINT("d"); + if (value->reg.size == 16) + PRINT("q"); + break; + case NV_FILE_PRED: + PRINT(" %s%cp%i", mgta, reg_pfx, nv_value_id(value)); + break; + case NV_FILE_COND: + PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value)); + break; + case NV_FILE_MEM_L: + nv_print_address('l', -1, indir, value->reg.address); + break; + case NV_FILE_MEM_G: + nv_print_address('g', -1, indir, value->reg.address); + break; + case NV_FILE_MEM_A: + nv_print_address('a', -1, indir, value->reg.address); + break; + case NV_FILE_MEM_V: + nv_print_address('v', -1, indir, value->reg.address); + break; + case NV_FILE_IMM: + switch (type) { + case NV_TYPE_U16: + case NV_TYPE_S16: + PRINT(" %s0x%04x", yllw, value->reg.imm.u32); + break; + case NV_TYPE_F32: + PRINT(" %s%f", yllw, value->reg.imm.f32); + break; + case NV_TYPE_F64: + PRINT(" %s%f", yllw, value->reg.imm.f64); + break; + case NV_TYPE_U32: + case NV_TYPE_S32: + case NV_TYPE_P32: + case NV_TYPE_ANY: + PRINT(" %s0x%08x", yllw, value->reg.imm.u32); + break; + } + break; + default: + if (value->reg.file >= NV_FILE_MEM_C(0) && + value->reg.file <= NV_FILE_MEM_C(15)) + nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), indir, + value->reg.address); + else + NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value)); + break; + } +} + +static INLINE void +nv_print_ref(struct nv_ref *ref, struct nv_value *indir, ubyte type) +{ + nv_print_value(ref->value, indir, type); +} + +void +nvc0_print_instruction(struct nv_instruction *i) +{ + int s; + + PRINT("%i: ", i->serial); + + if (i->predicate >= 0) { + PRINT("%s%s", gree, i->cc ? "fl" : "tr"); + nv_print_ref(i->src[i->predicate], NULL, NV_TYPE_U8); + PRINT(" "); + } + + PRINT("%s", gree); + if (NV_BASEOP(i->opcode) == NV_OP_SET) + PRINT("set %s", nv_cond_name(i->set_cond)); + else + if (i->saturate) + PRINT("sat %s", nvc0_opcode_name(i->opcode)); + else + PRINT("%s", nvc0_opcode_name(i->opcode)); + + if (i->opcode == NV_OP_CVT) + nv_print_value(i->def[0], NULL, i->ext.cvt.d); + else + if (i->def[0]) + nv_print_value(i->def[0], NULL, NV_OPTYPE(i->opcode)); + else + if (i->target) + PRINT(" %s(BB:%i)", yllw, i->target->id); + else + PRINT(" #"); + + for (s = 1; s < 4 && i->def[s]; ++s) + nv_print_value(i->def[s], NULL, NV_OPTYPE(i->opcode)); + if (s > 1) + PRINT("%s ,", norm); + + for (s = 0; s < 6 && i->src[s]; ++s) { + ubyte type; + if (s == i->indirect || s == i->predicate) + continue; + if (i->opcode == NV_OP_CVT) + type = i->ext.cvt.s; + else + type = NV_OPTYPE(i->opcode); + + if (i->src[s]->mod) + PRINT(" %s%s", gree, nv_modifier_string(i->src[s]->mod)); + + if (i->indirect >= 0 && + NV_IS_MEMORY_FILE(i->src[s]->value->reg.file)) + nv_print_ref(i->src[s], i->src[i->indirect]->value, type); + else + nv_print_ref(i->src[s], NULL, type); + } + PRINT(" %s\n", norm); +} + +#define NV_MOD_SGN NV_MOD_ABS | NV_MOD_NEG + +struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = +{ + { NV_OP_UNDEF, "undef", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, + { NV_OP_BIND, "bind", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, + { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, + { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, + { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, + { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 }, + + { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, + { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, + { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, + { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 }, + { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 1, 2 }, + { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 }, + { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, + { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, + { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, + { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, + + { NV_OP_VFETCH, "vfetch", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_PFETCH, "pfetch", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_EXPORT, "export", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_LINTERP, "linterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PINTERP, "pinterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_EMIT, "emit", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RESTART, "restart", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_TEX, "tex", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXB, "texbias", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXL, "texlod", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXF, "texfetch", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXQ, "texquery", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, + + { NV_OP_QUADOP, "quadop", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_DFDX, "dfdx", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_DFDY, "dfdy", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_KIL, "kil", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_BRA, "bra", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_CALL, "call", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RET, "ret", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RET, "exit", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_JOINAT, "joinat", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_JOIN, "join", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_ADD, "add", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, + { NV_OP_MUL, "mul", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, + { NV_OP_ABS, "abs", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NEG, "neg", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_MAX, "max", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET_F32_OR, "or set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET_F32_XOR, "xor set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, + + { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + + { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 } +}; -- cgit v1.2.3 From 608b3c4432f7b7b0c27fc22369e09c8b7d8cfc03 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 19 Dec 2010 21:49:32 +0100 Subject: nvc0: improve shader support for texturing Fixed shadow and cube texture fetches, add array texture fetches. --- src/gallium/drivers/nvc0/nvc0_pc.c | 2 +- src/gallium/drivers/nvc0/nvc0_pc.h | 12 ++- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 28 ++++-- src/gallium/drivers/nvc0/nvc0_pc_print.c | 2 + src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 2 + src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 132 +++++++++++++++++++++------- 6 files changed, 133 insertions(+), 45 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_pc_print.c') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index cf7b8e347f..72483f120e 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -397,7 +397,7 @@ nvc0_generate_code(struct nvc0_translation_info *ti) if (ret) goto out; #if NOUVEAU_DEBUG > 1 - nv_print_program(pc); + nvc0_print_program(pc); nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0); #endif diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index df0314965a..74867f02e7 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -52,7 +52,8 @@ #define NV_OP_NOP 5 /** - * BIND forces source operand i into the same register as destination operand i + * BIND forces source operand i into the same register as destination operand i, + * and the operands will be assigned consecutive registers (needed for TEX) * SELECT forces its multiple source operands and its destination operand into * one and the same register. */ @@ -152,8 +153,9 @@ #define NV_OP_SUB_S32 81 #define NV_OP_MAD_F32 NV_OP_MAD #define NV_OP_FSET_F32 82 +#define NV_OP_TXG 83 -#define NV_OP_COUNT 83 +#define NV_OP_COUNT 84 /* nv50 files omitted */ #define NV_FILE_GPR 0 @@ -380,9 +382,11 @@ struct nv_instruction { unsigned flat : 1; unsigned patch : 1; unsigned lanes : 4; /* 3rd byte */ - unsigned tex_argc : 3; + unsigned tex_dim : 2; + unsigned tex_array : 1; + unsigned tex_cube : 1; + unsigned tex_shadow : 1; /* 4th byte */ unsigned tex_live : 1; - unsigned tex_cube : 1; /* 4th byte */ unsigned tex_mask : 4; uint8_t quadop; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index cd1ad03b00..2f99d5a339 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -391,23 +391,37 @@ emit_minmax(struct nv_pc *pc, struct nv_instruction *i) static void emit_tex(struct nv_pc *pc, struct nv_instruction *i) { + int src1 = i->tex_array + i->tex_dim + i->tex_cube; + pc->emit[0] = 0x00000086; pc->emit[1] = 0x80000000; - if (i->opcode == NV_OP_TXB) pc->emit[1] |= 0x04000000; - else - if (i->opcode == NV_OP_TXL) pc->emit[1] |= 0x06000000; + switch (i->opcode) { + case NV_OP_TEX: pc->emit[1] = 0x80000000; break; + case NV_OP_TXB: pc->emit[1] = 0x84000000; break; + case NV_OP_TXL: pc->emit[1] = 0x86000000; break; + case NV_OP_TXF: pc->emit[1] = 0x90000000; break; + case NV_OP_TXG: pc->emit[1] = 0xe0000000; break; + default: + assert(0); + break; + } - set_pred(pc, i); + if (i->tex_array) + pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */ + if (i->tex_shadow) + pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */ - if (1) - pc->emit[0] |= 63 << 26; /* explicit derivatives */ + set_pred(pc, i); DID(pc, i->def[0], 14); SID(pc, i->src[0], 20); + SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */ pc->emit[1] |= i->tex_mask << 14; - pc->emit[1] |= (i->tex_argc - 1) << 20; + pc->emit[1] |= (i->tex_dim - 1) << 20; + if (i->tex_cube) + pc->emit[1] |= 3 << 20; assert(i->ext.tex.s < 16); diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 9eac5ad900..6249f1fd1c 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -371,5 +371,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 } }; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index 6f9d5de197..d24f09a150 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -492,6 +492,8 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) case NV_OP_TXB: case NV_OP_TXL: case NV_OP_TXQ: + /* on nvc0, TEX src and dst can differ */ + break; case NV_OP_BIND: if (iter) break; diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index d0c8275489..fecfc76fb7 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1156,30 +1156,59 @@ bld_lit(struct bld_context *bld, struct nv_value *dst0[4], } static INLINE void -get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg) +describe_texture_target(unsigned target, int *dim, + int *array, int *cube, int *shadow) { - switch (insn->Texture.Texture) { + *array = *cube = *shadow = 0; + + switch (target) { case TGSI_TEXTURE_1D: - *arg = *dim = 1; + *dim = 1; break; case TGSI_TEXTURE_SHADOW1D: - *dim = 1; - *arg = 2; + *dim = *shadow = 1; break; case TGSI_TEXTURE_UNKNOWN: case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: - *arg = *dim = 2; + *dim = 2; break; case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOWRECT: *dim = 2; - *arg = 3; + *shadow = 1; break; case TGSI_TEXTURE_3D: + *dim = 3; + break; case TGSI_TEXTURE_CUBE: - *dim = *arg = 3; + *dim = 2; + *cube = 1; + break; + /* + case TGSI_TEXTURE_CUBE_ARRAY: + *dim = 2; + *cube = *array = 1; break; + case TGSI_TEXTURE_1D_ARRAY: + *dim = *array = 1; + break; + case TGSI_TEXTURE_2D_ARRAY: + *dim = 2; + *array = 1; + break; + case TGSI_TEXTURE_SHADOW1D_ARRAY: + *dim = *array = *shadow = 1; + break; + case TGSI_TEXTURE_SHADOW2D_ARRAY: + *dim = 2; + *array = *shadow = 1; + break; + case TGSI_TEXTURE_CUBE_ARRAY: + *dim = 2; + *array = *cube = 1; + break; + */ default: assert(0); break; @@ -1215,13 +1244,13 @@ bld_clone(struct bld_context *bld, struct nv_instruction *nvi) /* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */ static void load_proj_tex_coords(struct bld_context *bld, - struct nv_value *t[4], int dim, int arg, + struct nv_value *t[4], int dim, int shadow, const struct tgsi_full_instruction *insn) { int c; unsigned mask = (1 << dim) - 1; - if (arg != dim) + if (shadow) mask |= 4; /* depth comparison value */ t[3] = emit_fetch(bld, insn, 0, 3); @@ -1279,33 +1308,68 @@ bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane, return val; } +/* order of TGSI operands: x y z layer shadow lod/bias */ +/* order of native operands: layer x y z | lod/bias shadow */ static struct nv_instruction * -emit_tex(struct bld_context *bld, uint opcode, - struct nv_value *dst[4], struct nv_value *t_in[4], - int argc, int tic, int tsc, int cube) +emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, + struct nv_value *dst[4], struct nv_value *arg[4], + int dim, int array, int cube, int shadow) { - struct nv_value *t[4]; - struct nv_instruction *nvi; + struct nv_value *src[4]; + struct nv_instruction *nvi, *bnd; int c; + int s = 0; + boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; + + if (array) + arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); + + /* ensure that all inputs reside in a GPR */ + for (c = 0; c < dim + array + cube + shadow; ++c) + (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1; + + /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ + + bnd = new_instruction(bld->pc, NV_OP_BIND); + if (array) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, s, src[s]); + nv_reference(bld->pc, bnd, s++, arg[dim + cube]); + } + for (c = 0; c < dim + cube; ++c, ++s) { + src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4)); + nv_reference(bld->pc, bnd, s, arg[c]); + } + + if (shadow || lodbias) { + bnd = new_instruction(bld->pc, NV_OP_BIND); - /* the inputs to a tex instruction must be separate values */ - for (c = 0; c < argc; ++c) { - t[c] = bld_insn_1(bld, NV_OP_MOV, t_in[c]); - t[c]->insn->fixed = 1; + if (lodbias) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, 0, src[s++]); + nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]); + } + if (shadow) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, lodbias, src[s++]); + nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]); + } } nvi = new_instruction(bld->pc, opcode); for (c = 0; c < 4; ++c) dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4)); - for (c = 0; c < argc; ++c) - nv_reference(bld->pc, nvi, c, t[c]); + for (c = 0; c < s; ++c) + nv_reference(bld->pc, nvi, c, src[c]); nvi->ext.tex.t = tic; nvi->ext.tex.s = tsc; nvi->tex_mask = 0xf; nvi->tex_cube = cube; + nvi->tex_dim = dim; + nvi->tex_cube = cube; + nvi->tex_shadow = shadow; nvi->tex_live = 0; - nvi->tex_argc = argc; return nvi; } @@ -1326,24 +1390,25 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], { struct nv_value *t[4], *s[3]; uint opcode = translate_opcode(insn->Instruction.Opcode); - int arg, dim, c; + int c, dim, array, cube, shadow; + const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; const int tic = insn->Src[1].Register.Index; const int tsc = tic; - const int cube = (insn->Texture.Texture == TGSI_TEXTURE_CUBE) ? 1 : 0; - get_tex_dim(insn, &dim, &arg); + describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow); + + assert(dim + array + shadow + lodbias <= 5); if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) - load_proj_tex_coords(bld, t, dim, arg, insn); + load_proj_tex_coords(bld, t, dim, shadow, insn); else { - for (c = 0; c < dim; ++c) + for (c = 0; c < dim + cube + array; ++c) t[c] = emit_fetch(bld, insn, 0, c); - if (arg != dim) - t[dim] = emit_fetch(bld, insn, 0, 2); + if (shadow) + t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2)); } if (cube) { - assert(dim >= 3); for (c = 0; c < 3; ++c) s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]); @@ -1355,9 +1420,10 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]); } - if (opcode == NV_OP_TXB || opcode == NV_OP_TXL) - t[arg++] = emit_fetch(bld, insn, 0, 3); - emit_tex(bld, opcode, dst0, t, arg, tic, tsc, cube); + if (lodbias) + t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3); + + emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow); } static INLINE struct nv_value * -- cgit v1.2.3 From 4fa429c876806d5ad614de469dec76a54a55bb74 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 27 Dec 2010 20:59:53 +0100 Subject: nvc0: reenable some shader optimizations CSE and constants folding. --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 96 ++++++++++++++++------------- src/gallium/drivers/nvc0/nvc0_pc_print.c | 18 +++--- 2 files changed, 63 insertions(+), 51 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_pc_print.c') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index e9e387632b..8aff875fc3 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -276,7 +276,7 @@ nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) for (s = 0; s < 3 && nvi->src[s]; ++s) { ld = nvi->src[s]->value->insn; - if (!ld || ld->opcode != NV_OP_LD) + if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV)) continue; if (!nvc0_insn_can_load(nvi, s, ld)) continue; @@ -383,9 +383,8 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL) -/* static void -modifiers_apply(uint32_t *val, ubyte type, ubyte mod) +apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod) { if (mod & NV_MOD_ABS) { if (type == NV_TYPE_F32) @@ -400,10 +399,28 @@ modifiers_apply(uint32_t *val, ubyte type, ubyte mod) else *val = ~(*val) + 1; } + if (mod & NV_MOD_SAT) { + union { + float f; + uint32_t u; + int32_t i; + } u; + u.u = *val; + if (type == NV_TYPE_F32) { + u.f = CLAMP(u.f, -1.0f, 1.0f); + } else + if (type == NV_TYPE_U16) { + u.u = MIN2(u.u, 0xffff); + } else + if (type == NV_TYPE_S16) { + u.i = CLAMP(u.i, -32768, 32767); + } + *val = u.u; + } + if (mod & NV_MOD_NOT) + *val = ~*val; } -*/ -#if 0 static void constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, struct nv_value *src0, struct nv_value *src1) @@ -424,8 +441,8 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, u0.u32 = src0->reg.imm.u32; u1.u32 = src1->reg.imm.u32; - modifiers_apply(&u0.u32, type, nvi->src[0]->mod); - modifiers_apply(&u1.u32, type, nvi->src[1]->mod); + apply_modifiers(&u0.u32, type, nvi->src[0]->mod); + apply_modifiers(&u1.u32, type, nvi->src[1]->mod); switch (nvi->opcode) { case NV_OP_MAD: @@ -468,14 +485,14 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, nvi->opcode = NV_OP_MOV; - val = new_value(pc, NV_FILE_IMM, type); - + val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type)); val->reg.imm.u32 = u.u32; nv_reference(pc, nvi, 1, NULL); nv_reference(pc, nvi, 0, val); - if (nvi->src[2]) { /* from MAD */ + if (nvi->src[2]) { + /* from MAD */ nvi->src[1] = nvi->src[0]; nvi->src[0] = nvi->src[2]; nvi->src[2] = NULL; @@ -506,7 +523,7 @@ constant_operand(struct nv_pc *pc, type = NV_OPTYPE(nvi->opcode); u.u32 = val->reg.imm.u32; - modifiers_apply(&u.u32, type, nvi->src[s]->mod); + apply_modifiers(&u.u32, type, nvi->src[s]->mod); switch (NV_BASEOP(nvi->opcode)) { case NV_OP_MUL: @@ -576,23 +593,22 @@ constant_operand(struct nv_pc *pc, break; } } -#endif static int nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) { -#if 0 struct nv_instruction *nvi, *next; int j; for (nvi = b->entry; nvi; nvi = next) { struct nv_value *src0, *src1, *src; - int mod; + int s; + uint8_t mod[4]; next = nvi->next; - src0 = nvcg_find_immediate(nvi->src[0]); - src1 = nvcg_find_immediate(nvi->src[1]); + src0 = nvc0_pc_find_immediate(nvi->src[0]); + src1 = nvc0_pc_find_immediate(nvi->src[1]); if (src0 && src1) constant_expression(ctx->pc, nvi, src0, src1); @@ -604,7 +620,7 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) constant_operand(ctx->pc, nvi, src1, 1); } - /* try to combine MUL, ADD into MAD */ + /* check if we can MUL + ADD -> MAD/FMA */ if (nvi->opcode != NV_OP_ADD) continue; @@ -622,20 +638,27 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) /* could have an immediate from above constant_* */ if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) continue; + s = (src == src0) ? 0 : 1; + + mod[0] = nvi->src[0]->mod; + mod[1] = nvi->src[1]->mod; + mod[2] = src->insn->src[0]->mod; + mod[3] = src->insn->src[0]->mod; + + if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) + continue; nvi->opcode = NV_OP_MAD; - mod = nvi->src[(src == src0) ? 0 : 1]->mod; - nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL); - nvi->src[2] = nvi->src[(src == src0) ? 1 : 0]; + nv_reference(ctx->pc, nvi, s, NULL); + nvi->src[2] = nvi->src[!s]; - assert(!(mod & ~NV_MOD_NEG)); nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value); nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value); - nvi->src[0]->mod = src->insn->src[0]->mod ^ mod; - nvi->src[1]->mod = src->insn->src[1]->mod; + nvi->src[0]->mod = mod[2] ^ mod[s]; + nvi->src[1]->mod = mod[3]; } DESCEND_ARBITRARY(j, nv_pass_lower_arith); -#endif + return 0; } @@ -1016,7 +1039,6 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) static int nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) { -#if 0 struct nv_instruction *ir, *ik, *next; struct nv_instruction *entry = b->phi ? b->phi : b->entry; int s; @@ -1030,23 +1052,13 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) if (ir->opcode != ik->opcode || ir->fixed) continue; - if (!ir->def[0] || !ik->def[0] || - ik->opcode == NV_OP_LDA || - ik->opcode == NV_OP_STA || - ik->opcode == NV_OP_MOV || - nv_is_vector_op(ik->opcode)) - continue; /* ignore loads, stores & moves */ - - if (ik->src[4] || ir->src[4]) - continue; /* don't mess with address registers */ + if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1]) + continue; - if (ik->flags_src || ir->flags_src || - ik->flags_def || ir->flags_def) - continue; /* and also not with flags, for now */ + if (ik->indirect != ir->indirect || ik->predicate != ir->predicate) + continue; - if (ik->def[0]->reg.file == NV_FILE_OUT || - ir->def[0]->reg.file == NV_FILE_OUT || - !values_equal(ik->def[0], ir->def[0])) + if (!values_equal(ik->def[0], ir->def[0])) continue; for (s = 0; s < 3; ++s) { @@ -1071,7 +1083,7 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) if (s == 3) { nvc0_insn_delete(ir); ++reps; - nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]); + nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]); break; } } @@ -1079,7 +1091,7 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) } while(reps); DESCEND_ARBITRARY(s, nv_pass_cse); -#endif + return 0; } diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 6249f1fd1c..b03826484e 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -283,21 +283,21 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, - { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, - { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, + { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, + { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, + { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 }, - { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 1, 2 }, - { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 }, - { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, + { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, - { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, + { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, -- cgit v1.2.3