From 633f5ac6124b1b57152c09becba92d176e905ae9 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 23 Jul 2010 21:21:25 +0200 Subject: nv50: import new compiler --- src/gallium/drivers/nv50/nv50_pc_emit.c | 1139 +++++++++++++++++++++++++++++++ 1 file changed, 1139 insertions(+) create mode 100644 src/gallium/drivers/nv50/nv50_pc_emit.c (limited to 'src/gallium/drivers/nv50/nv50_pc_emit.c') diff --git a/src/gallium/drivers/nv50/nv50_pc_emit.c b/src/gallium/drivers/nv50/nv50_pc_emit.c new file mode 100644 index 0000000000..b917d23232 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_pc_emit.c @@ -0,0 +1,1139 @@ +/*************************************************************************/ +/* Copyright (C) 2009 */ +/* */ +/* This program is free software: you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation, either version 3 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program. If not, see . */ +/*************************************************************************/ + +#include "nv50_context.h" +#include "nv50_pc.h" + +// Definitions + +#define FLAGS_CC_SHIFT 7 +#define FLAGS_ID_SHIFT 12 +#define FLAGS_WR_ID_SHIFT 4 +#define FLAGS_CC_MASK (0x1f << FLAGS_CC_SHIFT) +#define FLAGS_ID_MASK (0x03 << FLAGS_ID_SHIFT) +#define FLAGS_WR_EN (1 << 6) +#define FLAGS_WR_ID_MASK (0x3 << FLAGS_WR_ID_SHIFT) + +const ubyte nv50_inst_min_size_tab[NV_OP_COUNT] = +{ + 0, 0, 0, 8, 8, 4, 4, 4, 8, 4, 4, 8, 8, 8, 8, 8, /* 15 */ + 8, 8, 8, 4, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 31 */ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 47 */ + 4, 8, 8, 8, 8, 8, 0, 0 +}; + +/* XXX: silence, you ! */ +unsigned +nv50_inst_min_size(struct nv_instruction *i); + +unsigned +nv50_inst_min_size(struct nv_instruction *i) +{ + int n; + + if (nv50_inst_min_size_tab[i->opcode] > 4) + return 8; + + if (i->def[0] && i->def[0]->reg.file != NV_FILE_GPR) + return 8; + if (i->def[0]->join->reg.id > 63) + return 8; + + for (n = 0; n < 3; ++n) { + if (!i->src[n]) + break; + if (i->src[n]->value->reg.file != NV_FILE_GPR && + i->src[n]->value->reg.file != NV_FILE_MEM_V) + return 8; + if (i->src[n]->value->reg.id > 63) + return 8; + } + + if (i->flags_def || i->flags_src || i->src[4]) + return 8; + + if (i->src[2]) { + if (i->saturate || i->src[2]->mod) + return 8; + if (i->src[0]->mod ^ i->src[1]->mod) + return 8; + if ((i->src[0]->mod | i->src[1]->mod) & NV_MOD_ABS) + return 8; + if (i->def[0]->join->reg.id < 0 || + i->def[0]->join->reg.id != i->src[2]->value->join->reg.id) + return 8; + } + + return nv50_inst_min_size_tab[i->opcode]; +} + +static INLINE ubyte +STYPE(struct nv_instruction *nvi, int s) +{ + return nvi->src[s]->typecast; +} + +static INLINE ubyte +DTYPE(struct nv_instruction *nvi, int d) +{ + return nvi->def[d]->reg.type; +} + +static INLINE struct nv_reg * +SREG(struct nv_ref *ref) +{ + return &ref->value->join->reg; +} + +static INLINE struct nv_reg * +DREG(struct nv_value *val) +{ + return &val->join->reg; +} + +static INLINE ubyte +SFILE(struct nv_instruction *nvi, int s) +{ + return nvi->src[s]->value->reg.file; +} + +static INLINE ubyte +DFILE(struct nv_instruction *nvi, int d) +{ + return nvi->def[0]->reg.file; +} + +static INLINE void +SID(struct nv_pc *pc, struct nv_ref *ref, int pos) +{ + pc->emit[pos / 32] |= SREG(ref)->id << (pos % 32); +} + +static INLINE void +DID(struct nv_pc *pc, struct nv_value *val, int pos) +{ + pc->emit[pos / 32] |= DREG(val)->id << (pos % 32); +} + +static INLINE uint32_t +get_immd_u32(struct nv_ref *ref) +{ + assert(ref->value->reg.file == NV_FILE_IMM); + return ref->value->reg.imm.u32; +} + +static INLINE void +set_immd_u32(struct nv_pc *pc, uint32_t u32) +{ + pc->emit[1] |= 3; + pc->emit[0] |= (u32 & 0x3f) << 16; + pc->emit[1] |= (u32 >> 6) << 2; +} + +static INLINE void +set_immd(struct nv_pc *pc, struct nv_ref *ref) +{ + assert(ref->value->reg.file == NV_FILE_IMM); + set_immd_u32(pc, get_immd_u32(ref)); +} + +static void +new_fixup(struct nv_pc *pc, unsigned type, uint32_t data, uint32_t m, int s) +{ + const unsigned size = sizeof(struct nv_fixup); + const unsigned n = pc->num_fixups; + return; + + if (!(n % 8)) + pc->fixups = REALLOC(pc->fixups, n * size, (n + 8) * size); + + pc->fixups[n].offset = pc->bin_pos + (s / 32); + pc->fixups[n].type = type; + pc->fixups[n].data = data; + pc->fixups[n].mask = m << (s % 32); + pc->fixups[n].shift = s % 32; + + ++pc->num_fixups; + + assert(((data << (s % 32)) & pc->fixups[n].mask) == (data << (s % 32))); +} + +static void +nv_pc_alloc_immd(struct nv_pc *pc, struct nv_ref *ref) +{ + uint32_t i, val = get_immd_u32(ref); + + for (i = 0; i < pc->immd_count; ++i) + if (pc->immd_buf[i] == val) + break; + + if (i == pc->immd_count) { + if (!(pc->immd_count % 8)) + pc->immd_buf = REALLOC(pc->immd_buf, + pc->immd_count * 4, (pc->immd_count + 8) * 4); + pc->immd_buf[pc->immd_count++] = val; + } + + SREG(ref)->id = i; +} + +static INLINE void +set_pred(struct nv_pc *pc, struct nv_instruction *i) +{ + assert(!(pc->emit[1] & 0x00003f80)); + + pc->emit[1] |= i->cc << 7; + if (i->flags_src) + pc->emit[1] |= SREG(i->flags_src)->id << 12; +} + +static INLINE void +set_pred_wr(struct nv_pc *pc, struct nv_instruction *i) +{ + assert(!(pc->emit[1] & 0x00000070)); + + if (i->flags_def) + pc->emit[1] |= (DREG(i->flags_def)->id << 4) | 0x40; +} + +static INLINE void +set_a16_bits(struct nv_pc *pc, uint id) +{ + ++id; /* $a0 is always 0 */ + pc->emit[0] |= (id & 3) << 26; + pc->emit[1] |= id & 4; +} + +static INLINE void +set_addr(struct nv_pc *pc, struct nv_instruction *i) +{ + if (i->src[4]) + set_a16_bits(pc, SREG(i->src[4])->id); +} + +static void +set_dst(struct nv_pc *pc, struct nv_value *value) +{ + struct nv_reg *reg = &value->join->reg; + + if (reg->id < 0) { + debug_printf("WARNING: unused dst, hope we can bucket it !\n"); + pc->emit[0] |= 127 << 2; + pc->emit[1] |= 0x8; + return; + } + + if (reg->file == NV_FILE_OUT) + pc->emit[1] |= 0x8; + else + if (reg->file == NV_FILE_ADDR) + assert(0); + + pc->emit[0] |= reg->id << 2; +} + +static void +set_src_0(struct nv_pc *pc, struct nv_ref *ref) +{ + struct nv_reg *reg = SREG(ref); + + if (reg->file == NV_FILE_MEM_S) + pc->emit[1] |= 0x00200000; + else + if (reg->file == NV_FILE_MEM_P) + pc->emit[0] |= 0x01800000; + else + if (reg->file != NV_FILE_GPR) + NOUVEAU_ERR("invalid src0 register file: %d\n", reg->file); + + assert(reg->id < 128); + pc->emit[0] |= reg->id << 9; +} + +static void +set_src_1(struct nv_pc *pc, struct nv_ref *ref) +{ + struct nv_reg *reg = SREG(ref); + + if (reg->file >= NV_FILE_MEM_C(0) && + reg->file <= NV_FILE_MEM_C(15)) { + assert(!(pc->emit[1] & 0x01800000)); + + pc->emit[0] |= 0x00800000; + pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22; + } else + if (reg->file != NV_FILE_GPR) + NOUVEAU_ERR("invalid src1 register file: %d\n", reg->file); + + assert(reg->id < 128); + pc->emit[0] |= reg->id << 16; +} + +static void +set_src_2(struct nv_pc *pc, struct nv_ref *ref) +{ + struct nv_reg *reg = SREG(ref); + + if (reg->file >= NV_FILE_MEM_C(0) && + reg->file <= NV_FILE_MEM_C(15)) { + assert(!(pc->emit[1] & 0x01800000)); + + pc->emit[0] |= 0x01000000; + pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22; + } else + if (reg->file != NV_FILE_GPR) + NOUVEAU_ERR("invalid src2 register file: %d\n", reg->file); + + assert(reg->id < 128); + pc->emit[1] |= reg->id << 14; +} + +/* the default form: + * - long instruction + * - 1 to 3 sources in slots 0, 1, 2 + * - address & flags + */ +static void +emit_form_MAD(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] |= 1; + + set_pred(pc, i); + set_pred_wr(pc, i); + + if (i->def[0]) + set_dst(pc, i->def[0]); + else { + pc->emit[0] |= 0x01fc; + pc->emit[1] |= 0x0008; + } + + if (i->src[0]) + set_src_0(pc, i->src[0]); + + if (i->src[1]) + set_src_1(pc, i->src[1]); + + if (i->src[2]) + set_src_2(pc, i->src[2]); + + set_addr(pc, i); +} + +/* like default form, but 2nd source in slot 2, no 3rd source */ +static void +emit_form_ADD(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] |= 1; + + if (i->def[0]) + set_dst(pc, i->def[0]); + else { + pc->emit[0] |= 0x01fc; + pc->emit[1] |= 0x0008; + } + + set_pred(pc, i); + set_pred_wr(pc, i); + + if (i->src[0]) + set_src_0(pc, i->src[0]); + + if (i->src[1]) + set_src_2(pc, i->src[1]); + + set_addr(pc, i); +} + +/* short mul */ +static void +emit_form_MUL(struct nv_pc *pc, struct nv_instruction *i) +{ + assert(!i->is_long && !(pc->emit[0] & 1)); + + assert(i->def[0]); + set_dst(pc, i->def[0]); + + if (i->src[0]) + set_src_0(pc, i->src[0]); + + if (i->src[1]) + set_src_1(pc, i->src[1]); +} + +/* default immediate form + * - 1 to 3 sources where last is immediate + * - no address or predicate possible + */ +static void +emit_form_IMM(struct nv_pc *pc, struct nv_instruction *i, ubyte mod_mask) +{ + pc->emit[0] |= 1; + + assert(i->def[0]); + assert(i->src[0]); + set_dst(pc, i->def[0]); + + assert(!i->src[4] && !i->flags_src && !i->flags_def); + + if (i->src[2]) { + set_immd(pc, i->src[2]); + set_src_0(pc, i->src[1]); + set_src_1(pc, i->src[0]); + } else + if (i->src[1]) { + set_immd(pc, i->src[1]); + set_src_0(pc, i->src[0]); + } else + set_immd(pc, i->src[0]); + + assert(!mod_mask); +} + +static void +set_ld_st_size(struct nv_pc *pc, ubyte type) +{ + switch (type) { + case NV_TYPE_F64: + pc->emit[1] |= 0x8000; + break; + case NV_TYPE_F32: + case NV_TYPE_S32: + case NV_TYPE_U32: + pc->emit[1] |= 0xc000; + break; + case NV_TYPE_S16: + pc->emit[1] |= 0x6000; + break; + case NV_TYPE_U16: + pc->emit[1] |= 0x4000; + break; + case NV_TYPE_S8: + pc->emit[1] |= 0x2000; + break; + default: + break; + } +} + +static void +emit_ld(struct nv_pc *pc, struct nv_instruction *i) +{ + ubyte sf = SFILE(i, 0); + + if (sf == NV_FILE_IMM) { + sf = NV_FILE_MEM_C(0); + nv_pc_alloc_immd(pc, i->src[0]); + + new_fixup(pc, NV_FIXUP_PARAM_RELOC, SREG(i->src[0])->id, 0xffff, 9); + } + + if (sf == NV_FILE_MEM_S || + sf == NV_FILE_MEM_P) { + pc->emit[0] = 0x10000001; + pc->emit[1] = 0x04200000 | (0x3c << 12); + if (sf == NV_FILE_MEM_P) + pc->emit[0] |= 0x01800000; + } else + if (sf >= NV_FILE_MEM_C(0) && + sf <= NV_FILE_MEM_C(15)) { + pc->emit[0] = 0x10000001; + pc->emit[1] = 0x24000000; + pc->emit[1] |= (sf - NV_FILE_MEM_C(0)) << 22; + } else + if (sf >= NV_FILE_MEM_G(0) && + sf <= NV_FILE_MEM_G(15)) { + pc->emit[0] = 0xd0000001 | ((sf - NV_FILE_MEM_G(0)) << 16); + pc->emit[1] = 0xa0000000; + + assert(i->src[4] && SREG(i->src[4])->file == NV_FILE_GPR); + SID(pc, i->src[4], 9); + } else + if (sf == NV_FILE_MEM_L) { + pc->emit[0] = 0xd0000001; + pc->emit[1] = 0x40000000; + } else { + NOUVEAU_ERR("invalid ld source file\n"); + abort(); + } + + set_ld_st_size(pc, STYPE(i, 0)); + + set_dst(pc, i->def[0]); + set_pred_wr(pc, i); + + set_pred(pc, i); + + if (sf < NV_FILE_MEM_G(0) || + sf > NV_FILE_MEM_G(15)) { + SID(pc, i->src[0], 9); + set_addr(pc, i); + } +} + +static void +emit_st(struct nv_pc *pc, struct nv_instruction *i) +{ + +} + +static int +verify_mov(struct nv_instruction *i) +{ + ubyte sf = SFILE(i, 0); + ubyte df = DFILE(i, 0); + + if (df == NV_FILE_GPR) + return 0; + + if (df != NV_FILE_OUT && + df != NV_FILE_FLAGS && + df != NV_FILE_ADDR) + return 1; + + if (sf == NV_FILE_FLAGS) + return 2; + if (sf == NV_FILE_ADDR) + return 3; + if (sf == NV_FILE_IMM && df != NV_FILE_OUT) + return 4; + + return 0; +} + +static void +emit_mov(struct nv_pc *pc, struct nv_instruction *i) +{ + assert(!verify_mov(i)); + + if (SFILE(i, 0) >= NV_FILE_MEM_S) + emit_ld(pc, i); + else + if (SFILE(i, 0) == NV_FILE_FLAGS) { + pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2); + pc->emit[1] = 0x20000780 | (SREG(i->src[0])->id << 12); + } else + if (SFILE(i, 0) == NV_FILE_ADDR) { + pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2); + pc->emit[1] = 0x40000780; + set_a16_bits(pc, SREG(i->src[0])->id); + } else + if (DFILE(i, 0) == NV_FILE_FLAGS) { + pc->emit[0] = 0x000001fd; + pc->emit[1] = 0xa0000788 | (1 << 6); + pc->emit[0] |= SREG(i->src[0])->id << 9; + pc->emit[1] |= DREG(i->def[0])->id << 4; + } else + if (SFILE(i, 0) == NV_FILE_IMM) { + if (i->opcode == NV_OP_LDA) + emit_ld(pc, i); + else { + pc->emit[0] = 0x10008001; + pc->emit[1] = 0x00000003; + + emit_form_IMM(pc, i, 0); + } + } else { + pc->emit[0] = 0x10000000; + pc->emit[0] |= DREG(i->def[0])->id << 2; + pc->emit[0] |= SREG(i->src[0])->id << 9; + + if (!i->is_long) + pc->emit[0] |= 0x8000; + else { + pc->emit[0] |= 0x00000001; + pc->emit[1] = 0x0403c000; + + set_pred(pc, i); + } + } + + if (DFILE(i, 0) == NV_FILE_OUT) + pc->emit[1] |= 0x8; +} + +static void +emit_interp(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x80000000; + + assert(DFILE(i, 0) == NV_FILE_GPR); + assert(SFILE(i, 0) == NV_FILE_MEM_V); + + DID(pc, i->def[0], 2); + SID(pc, i->src[0], 16); + + if (i->flat) + pc->emit[0] |= 1 << 8; + else + if (i->opcode == NV_OP_PINTERP) { + pc->emit[0] |= 1 << 25; + pc->emit[0] |= SREG(i->src[1])->id << 9; + } + + if (i->centroid) + pc->emit[0] |= 1 << 24; + + if (i->is_long) { + pc->emit[1] |= 0x0780 | + (pc->emit[0] & (3 << 24)) >> (24 - 16) | + (pc->emit[0] & (1 << 8)) >> (18 - 8); + + pc->emit[0] |= 1; + pc->emit[0] &= ~0x03000100; + } +} + +static void +emit_minmax(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x30000000; + pc->emit[1] = (i->opcode == NV_OP_MIN) ? (2 << 28) : 0; + + switch (DTYPE(i, 0)) { + case NV_TYPE_F32: + pc->emit[0] |= 0x80000000; + pc->emit[1] |= 0x80000000; + break; + case NV_TYPE_S32: + pc->emit[1] |= 0x8c000000; + break; + case NV_TYPE_U32: + pc->emit[1] |= 0x84000000; + break; + } + + emit_form_MAD(pc, i); + + if (i->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000; + if (i->src[1]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00080000; +} + +static void +emit_add_f32(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0xb0000000; + + if (SFILE(i, 1) == NV_FILE_IMM) { + emit_form_IMM(pc, i, 0); + + if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000; + if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22; + } else + if (i->is_long) { + emit_form_ADD(pc, i); + + if (i->src[0]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 26; + if (i->src[1]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 27; + } else { + emit_form_MUL(pc, i); + + if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000; + if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22; + } +} + +static void +emit_add_b32(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x20008000; + + if (SFILE(i, 1) == NV_FILE_IMM) { + emit_form_IMM(pc, i, 0); + } else + if (i->is_long) { + pc->emit[0] = 0x20000000; + pc->emit[1] = 0x04000000; + emit_form_ADD(pc, i); + } else { + emit_form_MUL(pc, i); + } + + if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 28; + if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22; +} + +static void +emit_add_a16(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0xd0000001 | (get_immd_u32(i->src[0]) << 9); + pc->emit[1] = 0x20000000; + + pc->emit[0] |= (DREG(i->def[0])->id + 1) << 2; + + set_pred(pc, i); + + if (i->src[1]) + set_a16_bits(pc, SREG(i->src[1])->id); +} + +static void +emit_flow(struct nv_pc *pc, struct nv_instruction *i, ubyte flow_op) +{ + pc->emit[0] = 0x00000003 | (flow_op << 28); + pc->emit[1] = 0x00000000; + + set_pred(pc, i); + + if (i->target) { + new_fixup(pc, NV_FIXUP_CFLOW_RELOC, i->target->bin_pos, 0x7ff800, 11); + pc->emit[0] |= (i->target->bin_pos / 4) << 11; + } +} + +static INLINE void +emit_add(struct nv_pc *pc, struct nv_instruction *i) +{ + if (DFILE(i, 0) == NV_FILE_ADDR) + emit_add_a16(pc, i); + else { + switch (DTYPE(i, 0)) { + case NV_TYPE_F32: + emit_add_f32(pc, i); + break; + case NV_TYPE_U32: + case NV_TYPE_S32: + emit_add_b32(pc, i); + break; + } + } +} + +static void +emit_bitop2(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0xd0000000; + + if (SFILE(i, 0) == NV_FILE_IMM) { + emit_form_IMM(pc, i, 0); + + if (i->opcode == NV_OP_OR) + pc->emit[0] |= 0x0100; + else + if (i->opcode == NV_OP_XOR) + pc->emit[0] |= 0x8000; + } else { + emit_form_MAD(pc, i); + + pc->emit[1] |= 0x04000000; + + if (i->opcode == NV_OP_OR) + pc->emit[1] |= 0x4000; + else + if (i->opcode == NV_OP_XOR) + pc->emit[1] |= 0x8000; + } +} + +static void +emit_shift(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x30000001; + pc->emit[1] = 0xc4000000; + + if (i->opcode == NV_OP_SHR) + pc->emit[1] |= 1 << 29; + + if (SFILE(i, 1) == NV_FILE_IMM) { + pc->emit[1] |= 1 << 20; + pc->emit[0] |= (get_immd_u32(i->src[1]) & 0x7f) << 16; + + set_pred(pc, i); + } else + emit_form_MAD(pc, i); + + if (STYPE(i, 0) == NV_TYPE_S32) + pc->emit[1] |= 1 << 27; +} + +static void +emit_flop(struct nv_pc *pc, struct nv_instruction *i) +{ + struct nv_ref *src0 = i->src[0]; + + pc->emit[0] = 0x90000000; + + assert(SREG(src0)->type == NV_TYPE_F32); + assert(SREG(src0)->file == NV_FILE_GPR); + + if (!i->is_long) { + emit_form_MUL(pc, i); + assert(i->opcode == NV_OP_RCP && !src0->mod); + return; + } + + pc->emit[1] = (i->opcode - NV_OP_RCP) << 29; + + emit_form_MAD(pc, i); + + if (src0->mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000; + if (src0->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000; +} + +static void +emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i) +{ + const boolean neg_mul = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG; + const boolean neg_add = (i->src[2]->mod & NV_MOD_NEG); + + pc->emit[0] = 0xe0000000; + + if (!i->is_long) { + emit_form_MUL(pc, i); + assert(!neg_mul && !neg_add); + return; + } + + emit_form_MAD(pc, i); + + if (neg_mul) pc->emit[1] |= 0x04000000; + if (neg_add) pc->emit[1] |= 0x08000000; + + if (i->saturate) + pc->emit[1] |= 0x20000000; +} + +static INLINE void +emit_mad(struct nv_pc *pc, struct nv_instruction *i) +{ + emit_mad_f32(pc, i); +} + +static void +emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i) +{ + boolean neg = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG; + + pc->emit[0] = 0xc0000000; + + if (SFILE(i, 1) == NV_FILE_IMM) { + emit_form_IMM(pc, i, 0); + + if (neg) + pc->emit[0] |= 0x8000; + } else + if (i->is_long) { + emit_form_MAD(pc, i); + + if (neg) + pc->emit[1] |= 0x08 << 24; + } else { + emit_form_MUL(pc, i); + + if (neg) + pc->emit[0] |= 0x8000; + } +} + +static void +emit_set(struct nv_pc *pc, struct nv_instruction *nvi) +{ + assert(nvi->is_long); + + pc->emit[0] = 0x30000000; + pc->emit[1] = 0x60000000; + + pc->emit[1] |= nvi->set_cond << 14; + + switch (STYPE(nvi, 0)) { + case NV_TYPE_U32: pc->emit[1] |= 0x04000000; break; + case NV_TYPE_S32: pc->emit[1] |= 0x0c000000; break; + case NV_TYPE_F32: pc->emit[0] |= 0x80000000; break; + default: + assert(0); + break; + } + + emit_form_MAD(pc, nvi); +} + +#define CVT_RN (0x00 << 16) +#define CVT_FLOOR (0x02 << 16) +#define CVT_CEIL (0x04 << 16) +#define CVT_TRUNC (0x06 << 16) +#define CVT_SAT (0x08 << 16) +#define CVT_ABS (0x10 << 16) + +#define CVT_X32_X32 0x04004000 +#define CVT_X32_S32 0x04014000 +#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32) +#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32) +#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32) +#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32) +#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32) +#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32) +#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32) +#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32) +#define CVT_U32_U32 ((0x00 << 24) | CVT_X32_X32) + +#define CVT_NEG 0x20000000 +#define CVT_RI 0x08000000 + +static void +emit_cvt(struct nv_pc *pc, struct nv_instruction *nvi) +{ + ubyte dst_type = nvi->def[0] ? DTYPE(nvi, 0) : STYPE(nvi, 0); + + pc->emit[0] = 0xa0000000; + + switch (dst_type) { + case NV_TYPE_F32: + switch (STYPE(nvi, 0)) { + case NV_TYPE_F32: pc->emit[1] = CVT_F32_F32; break; + case NV_TYPE_S32: pc->emit[1] = CVT_F32_S32; break; + case NV_TYPE_U32: pc->emit[1] = CVT_F32_U32; break; + } + break; + case NV_TYPE_S32: + switch (STYPE(nvi, 0)) { + case NV_TYPE_F32: pc->emit[1] = CVT_S32_F32; break; + case NV_TYPE_S32: pc->emit[1] = CVT_S32_S32; break; + case NV_TYPE_U32: pc->emit[1] = CVT_S32_U32; break; + } + break; + case NV_TYPE_U32: + switch (STYPE(nvi, 0)) { + case NV_TYPE_F32: pc->emit[1] = CVT_U32_F32; break; + case NV_TYPE_S32: pc->emit[1] = CVT_U32_S32; break; + case NV_TYPE_U32: pc->emit[1] = CVT_U32_U32; break; + } + break; + } + if (pc->emit[1] == CVT_F32_F32 && + (nvi->opcode == NV_OP_CEIL || nvi->opcode == NV_OP_FLOOR || + nvi->opcode == NV_OP_TRUNC)) + pc->emit[1] |= CVT_RI; + + switch (nvi->opcode) { + case NV_OP_CEIL: pc->emit[1] |= CVT_CEIL; break; + case NV_OP_FLOOR: pc->emit[1] |= CVT_FLOOR; break; + case NV_OP_TRUNC: pc->emit[1] |= CVT_TRUNC; break; + + case NV_OP_ABS: pc->emit[1] |= CVT_ABS; break; + case NV_OP_SAT: pc->emit[1] |= CVT_SAT; break; + case NV_OP_NEG: pc->emit[1] |= CVT_NEG; break; + default: + assert(nvi->opcode == NV_OP_CVT); + break; + } + assert(nvi->opcode != NV_OP_ABS || !(nvi->src[0]->mod & NV_MOD_NEG)); + + if (nvi->src[0]->mod & NV_MOD_NEG) pc->emit[1] ^= CVT_NEG; + if (nvi->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= CVT_ABS; + + emit_form_MAD(pc, nvi); +} + +static void +emit_tex(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0xf0000001; + pc->emit[1] = 0x00000000; + + DID(pc, i->def[0], 2); + + set_pred(pc, i); + + pc->emit[0] |= i->tex_t << 9; + pc->emit[0] |= i->tex_s << 17; + + pc->emit[0] |= i->tex_argc << 22; + + pc->emit[0] |= (i->tex_mask & 0x3) << 25; + pc->emit[1] |= (i->tex_mask & 0xc) << 12; + + if (i->tex_live) + pc->emit[1] |= 4; + + if (i->tex_cube) + pc->emit[0] |= 0x08000000; + + if (i->opcode == NV_OP_TXB) + pc->emit[1] |= 0x20000000; + else + if (i->opcode == NV_OP_TXL) + pc->emit[1] |= 0x40000000; + else + pc->emit[0] -= 1 << 22; +} + +static void +emit_cvt2fixed(struct nv_pc *pc, struct nv_instruction *i) +{ + ubyte mod = i->src[0]->mod; + + pc->emit[0] = 0xb0000000; + pc->emit[1] = 0xc0000000; + + if (i->opcode == NV_OP_PREEX2) + pc->emit[1] |= 0x4000; + + emit_form_MAD(pc, i); + + if (mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000; + if (mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000; +} + +static void +emit_ddx(struct nv_pc *pc, struct nv_instruction *i) +{ + assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR); + + pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0240001 : 0xc0140001; + pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x86400000 : 0x89800000; + + DID(pc, i->def[0], 2); + SID(pc, i->src[0], 9); + SID(pc, i->src[0], 32 + 14); + + set_pred(pc, i); + set_pred_wr(pc, i); +} + +static void +emit_ddy(struct nv_pc *pc, struct nv_instruction *i) +{ + assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR); + + pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0250001 : 0xc0150001; + pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x85800000 : 0x8a400000; + + DID(pc, i->def[0], 2); + SID(pc, i->src[0], 9); + SID(pc, i->src[0], 32 + 14); + + set_pred(pc, i); + set_pred_wr(pc, i); +} + +void +nv50_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) +{ + // nv_print_instruction(i); + + switch (i->opcode) { + case NV_OP_MOV: + if (DFILE(i, 0) == NV_FILE_ADDR) + emit_add_a16(pc, i); + else + emit_mov(pc, i); + break; + case NV_OP_LDA: + emit_mov(pc, i); + break; + case NV_OP_STA: + emit_st(pc, i); + break; + case NV_OP_LINTERP: + case NV_OP_PINTERP: + emit_interp(pc, i); + break; + case NV_OP_ADD: + emit_add(pc, i); + break; + case NV_OP_AND: + case NV_OP_OR: + case NV_OP_XOR: + emit_bitop2(pc, i); + break; + case NV_OP_CVT: + case NV_OP_ABS: + case NV_OP_NEG: + case NV_OP_SAT: + case NV_OP_CEIL: + case NV_OP_FLOOR: + case NV_OP_TRUNC: + emit_cvt(pc, i); + break; + case NV_OP_DFDX: + emit_ddx(pc, i); + break; + case NV_OP_DFDY: + emit_ddy(pc, i); + break; + case NV_OP_RCP: + case NV_OP_RSQ: + case NV_OP_LG2: + case NV_OP_SIN: + case NV_OP_COS: + case NV_OP_EX2: + emit_flop(pc, i); + break; + case NV_OP_PRESIN: + case NV_OP_PREEX2: + emit_cvt2fixed(pc, i); + break; + case NV_OP_MAD: + emit_mad(pc, i); + break; + case NV_OP_MAX: + case NV_OP_MIN: + emit_minmax(pc, i); + break; + case NV_OP_MUL: + emit_mul_f32(pc, i); + break; + case NV_OP_SET: + emit_set(pc, i); + break; + case NV_OP_SHL: + case NV_OP_SHR: + emit_shift(pc, i); + break; + case NV_OP_TEX: + case NV_OP_TXB: + case NV_OP_TXL: + emit_tex(pc, i); + break; + case NV_OP_KIL: + emit_flow(pc, i, 0x0); + break; + case NV_OP_BRA: + emit_flow(pc, i, 0x1); + break; + case NV_OP_CALL: + emit_flow(pc, i, 0x2); + break; + case NV_OP_RET: + emit_flow(pc, i, 0x3); + break; + case NV_OP_BREAKADDR: + emit_flow(pc, i, 0x4); + break; + case NV_OP_BREAK: + emit_flow(pc, i, 0x5); + break; + case NV_OP_JOINAT: + emit_flow(pc, i, 0xa); + break; + case NV_OP_NOP: + pc->emit[0] = 0xf0000001; + pc->emit[1] = 0xe0000000; + break; + case NV_OP_PHI: + case NV_OP_SUB: + NOUVEAU_ERR("operation \"%s\" should have been eliminated\n", + nv_opcode_name(i->opcode)); + break; + default: + NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode); + abort(); + break; + } + + assert((pc->emit[0] & 1) == i->is_long); +} -- cgit v1.2.3