/*
 * Copyright 2010 Christoph Bumiller
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include "nv50_context.h"
#include "nv50_pc.h"

#define FLAGS_CC_SHIFT    7
#define FLAGS_ID_SHIFT    12
#define FLAGS_WR_ID_SHIFT 4
#define FLAGS_CC_MASK     (0x1f << FLAGS_CC_SHIFT)
#define FLAGS_ID_MASK     (0x03 << FLAGS_ID_SHIFT)
#define FLAGS_WR_EN       (1 << 6)
#define FLAGS_WR_ID_MASK  (0x3 << FLAGS_WR_ID_SHIFT)

#define NV50_FIXUP_CODE_RELOC 0
#define NV50_FIXUP_DATA_RELOC 1

struct nv50_fixup {
   uint8_t type;
   int8_t shift;
   uint32_t mask;
   uint32_t data;
   uint32_t offset;
};

void
nv50_relocate_program(struct nv50_program *p,
                      uint32_t code_base,
                      uint32_t data_base)
{
   struct nv50_fixup *f = (struct nv50_fixup *)p->fixups;
   unsigned i;

   for (i = 0; i < p->num_fixups; ++i) {
      uint32_t data;

      switch (f[i].type) {
      case NV50_FIXUP_CODE_RELOC: data = code_base + f[i].data; break;
      case NV50_FIXUP_DATA_RELOC: data = data_base + f[i].data; break;
      default:
         data = f[i].data;
         break;
      }
      data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift);

      p->code[f[i].offset / 4] &= ~f[i].mask;
      p->code[f[i].offset / 4] |= data & f[i].mask;
   }
}

static void
new_fixup(struct nv_pc *pc, uint8_t ty, int w, uint32_t data, uint32_t m, int s)
{
   struct nv50_fixup *f;

   const unsigned size = sizeof(struct nv50_fixup);
   const unsigned n = pc->num_fixups;

   if (!(n % 8))
      pc->fixups = REALLOC(pc->fixups, n * size, (n + 8) * size);

   f = (struct nv50_fixup *)pc->fixups;

   f[n].offset = (pc->bin_pos + w) * 4;
   f[n].type = ty;
   f[n].data = data;
   f[n].mask = m;
   f[n].shift = s;

   ++pc->num_fixups;
}

const ubyte nv50_inst_min_size_tab[NV_OP_COUNT] =
{
   0, 0, 0, 8, 8, 4, 4, 4, 8, 4, 4, 8, 8, 8, 8, 8, /* 15 */
   8, 8, 8, 4, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 31 */
   8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 47 */
   4, 8, 8, 8, 8, 8, 0, 0, 8
};

unsigned
nv50_inst_min_size(struct nv_instruction *i)
{
   int n;

   if (nv50_inst_min_size_tab[i->opcode] > 4)
      return 8;

   if (i->def[0] && i->def[0]->reg.file != NV_FILE_GPR)
      return 8;
   if (i->def[0]->join->reg.id > 63)
      return 8;

   for (n = 0; n < 3; ++n) {
      if (!i->src[n])
         break;
      if (i->src[n]->value->reg.file != NV_FILE_GPR &&
          i->src[n]->value->reg.file != NV_FILE_MEM_V)
         return 8;
      if (i->src[n]->value->reg.id > 63)
         return 8;
   }

   if (i->flags_def || i->flags_src || i->src[4])
      return 8;

   if (i->is_join)
      return 8;

   if (i->src[2]) {
      if (i->saturate || i->src[2]->mod)
         return 8;
      if (i->src[0]->mod ^ i->src[1]->mod)
         return 8;
      if ((i->src[0]->mod | i->src[1]->mod) & NV_MOD_ABS)
         return 8;
      if (i->def[0]->join->reg.id < 0 ||
          i->def[0]->join->reg.id != i->src[2]->value->join->reg.id)
         return 8;
   }

   return nv50_inst_min_size_tab[i->opcode];
}

static INLINE ubyte
STYPE(struct nv_instruction *nvi, int s)
{
   return nvi->src[s]->typecast;
}

static INLINE ubyte
DTYPE(struct nv_instruction *nvi, int d)
{
   return nvi->def[d]->reg.type;
}

static INLINE struct nv_reg *
SREG(struct nv_ref *ref)
{
   return &ref->value->join->reg;
}

static INLINE struct nv_reg *
DREG(struct nv_value *val)
{
   return &val->join->reg;
}

static INLINE ubyte
SFILE(struct nv_instruction *nvi, int s)
{
   return nvi->src[s]->value->reg.file;
}

static INLINE ubyte
DFILE(struct nv_instruction *nvi, int d)
{
   return nvi->def[0]->reg.file;
}

static INLINE void
SID(struct nv_pc *pc, struct nv_ref *ref, int pos)
{
   pc->emit[pos / 32] |= SREG(ref)->id << (pos % 32);
}

static INLINE void
DID(struct nv_pc *pc, struct nv_value *val, int pos)
{
   pc->emit[pos / 32] |= DREG(val)->id << (pos % 32);
}

static INLINE uint32_t
get_immd_u32(struct nv_ref *ref)
{
   assert(ref->value->reg.file == NV_FILE_IMM);
   return ref->value->reg.imm.u32;
}

static INLINE void
set_immd_u32(struct nv_pc *pc, uint32_t u32)
{
   pc->emit[1] |= 3;
   pc->emit[0] |= (u32 & 0x3f) << 16;
   pc->emit[1] |= (u32 >> 6) << 2;
}

static INLINE void
set_immd(struct nv_pc *pc, struct nv_ref *ref)
{
   assert(ref->value->reg.file == NV_FILE_IMM);
   set_immd_u32(pc, get_immd_u32(ref));
}

/* Allocate data in immediate buffer, if we want to load the immediate
 * for a constant buffer instead of inlining it into the code.
 */
static void
nv_pc_alloc_immd(struct nv_pc *pc, struct nv_ref *ref)
{
   uint32_t i, val = get_immd_u32(ref);

   for (i = 0; i < pc->immd_count; ++i)
      if (pc->immd_buf[i] == val)
         break;

   if (i == pc->immd_count) {
      if (!(pc->immd_count % 8))
         pc->immd_buf = REALLOC(pc->immd_buf,
				pc->immd_count * 4, (pc->immd_count + 8) * 4);
      pc->immd_buf[pc->immd_count++] = val;
   }

   SREG(ref)->id = i;
}

static INLINE void
set_pred(struct nv_pc *pc, struct nv_instruction *i)
{
   assert(!(pc->emit[1] & 0x00003f80));

   pc->emit[1] |= i->cc << 7;
   if (i->flags_src)
      pc->emit[1] |= SREG(i->flags_src)->id << 12;
}

static INLINE void
set_pred_wr(struct nv_pc *pc, struct nv_instruction *i)
{
   assert(!(pc->emit[1] & 0x00000070));

   if (i->flags_def)
      pc->emit[1] |= (DREG(i->flags_def)->id << 4) | 0x40;
}

static INLINE void
set_a16_bits(struct nv_pc *pc, uint id)
{
   ++id; /* $a0 is always 0 */
   pc->emit[0] |= (id & 3) << 26;
   pc->emit[1] |= id & 4;
}

static INLINE void
set_addr(struct nv_pc *pc, struct nv_instruction *i)
{
   if (i->src[4])
      set_a16_bits(pc, SREG(i->src[4])->id);
}

static void
set_dst(struct nv_pc *pc, struct nv_value *value)
{
   struct nv_reg *reg = &value->join->reg;

   if (reg->id < 0) {
      pc->emit[0] |= (127 << 2) | 1; /* set 'long'-bit to catch bugs */
      pc->emit[1] |= 0x8;
      return;
   }

   if (reg->file == NV_FILE_OUT)
      pc->emit[1] |= 0x8;
   else
   if (reg->file == NV_FILE_ADDR)
      assert(0);

   pc->emit[0] |= reg->id << 2;
}

static void
set_src_0(struct nv_pc *pc, struct nv_ref *ref)
{
   struct nv_reg *reg = SREG(ref);

   if (reg->file == NV_FILE_MEM_S)
      pc->emit[1] |= 0x00200000;
   else
   if (reg->file == NV_FILE_MEM_P)
      pc->emit[0] |= 0x01800000;
   else
   if (reg->file != NV_FILE_GPR)
      NOUVEAU_ERR("invalid src0 register file: %d\n", reg->file);

   assert(reg->id < 128);
   pc->emit[0] |= reg->id << 9;
}

static void
set_src_1(struct nv_pc *pc, struct nv_ref *ref)
{
   struct nv_reg *reg = SREG(ref);

   if (reg->file >= NV_FILE_MEM_C(0) &&
       reg->file <= NV_FILE_MEM_C(15)) {
      assert(!(pc->emit[1] & 0x01800000));

      pc->emit[0] |= 0x00800000;
      pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22;
   } else
   if (reg->file != NV_FILE_GPR)
      NOUVEAU_ERR("invalid src1 register file: %d\n", reg->file);

   assert(reg->id < 128);
   pc->emit[0] |= reg->id << 16;
}

static void
set_src_2(struct nv_pc *pc, struct nv_ref *ref)
{
   struct nv_reg *reg = SREG(ref);

   if (reg->file >= NV_FILE_MEM_C(0) &&
       reg->file <= NV_FILE_MEM_C(15)) {
      assert(!(pc->emit[1] & 0x01800000));

      pc->emit[0] |= 0x01000000;
      pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22;
   } else
   if (reg->file != NV_FILE_GPR)
      NOUVEAU_ERR("invalid src2 register file: %d\n", reg->file);

   assert(reg->id < 128);
   pc->emit[1] |= reg->id << 14;
}

/* the default form:
 * - long instruction
 * - 1 to 3 sources in slots 0, 1, 2
 * - address & flags
 */
static void
emit_form_MAD(struct nv_pc *pc, struct nv_instruction *i)
{
   pc->emit[0] |= 1;

   set_pred(pc, i);
   set_pred_wr(pc, i);

   if (i->def[0])
      set_dst(pc, i->def[0]);
   else {
      pc->emit[0] |= 0x01fc;
      pc->emit[1] |= 0x0008;
   }

   if (i->src[0])
      set_src_0(pc, i->src[0]);

   if (i->src[1])
      set_src_1(pc, i->src[1]);

   if (i->src[2])
      set_src_2(pc, i->src[2]);

   set_addr(pc, i);
}

/* like default form, but 2nd source in slot 2, no 3rd source */
static void
emit_form_ADD(struct nv_pc *pc, struct nv_instruction *i)
{
   pc->emit[0] |= 1;

   if (i->def[0])
      set_dst(pc, i->def[0]);
   else {
      pc->emit[0] |= 0x01fc;
      pc->emit[1] |= 0x0008;
   }

   set_pred(pc, i);
   set_pred_wr(pc, i);

   if (i->src[0])
      set_src_0(pc, i->src[0]);

   if (i->src[1])
      set_src_2(pc, i->src[1]);

   set_addr(pc, i);
}

/* short mul */
static void
emit_form_MUL(struct nv_pc *pc, struct nv_instruction *i)
{
   assert(!i->is_long && !(pc->emit[0] & 1));

   assert(i->def[0]);
   set_dst(pc, i->def[0]);

   if (i->src[0])
      set_src_0(pc, i->src[0]);

   if (i->src[1])
      set_src_1(pc, i->src[1]);
}

/* default immediate form
 * - 1 to 3 sources where last is immediate
 * - no address or predicate possible
 */
static void
emit_form_IMM(struct nv_pc *pc, struct nv_instruction *i, ubyte mod_mask)
{
   pc->emit[0] |= 1;

   assert(i->def[0]);
   assert(i->src[0]);
   set_dst(pc, i->def[0]);

   assert(!i->src[4] && !i->flags_src && !i->flags_def);

   if (i->src[2]) {
      set_immd(pc, i->src[2]);
      set_src_0(pc, i->src[1]);
      set_src_1(pc, i->src[0]);
   } else
   if (i->src[1]) {
      set_immd(pc, i->src[1]);
      set_src_0(pc, i->src[0]);
   } else
      set_immd(pc, i->src[0]);

   assert(!mod_mask);
}

static void
set_ld_st_size(struct nv_pc *pc, int s, ubyte type)
{
   switch (type) {
   case NV_TYPE_F64:
      pc->emit[1] |= 0x8000 << s;
      break;
   case NV_TYPE_F32:
   case NV_TYPE_S32:
   case NV_TYPE_U32:
      pc->emit[1] |= 0xc000 << s;
      break;
   case NV_TYPE_S16:
      pc->emit[1] |= 0x6000 << s;
      break;
   case NV_TYPE_U16:
      pc->emit[1] |= 0x4000 << s;
      break;
   case NV_TYPE_S8:
      pc->emit[1] |= 0x2000 << s;
      break;
   default:
      break;
   }
}

static void
emit_ld(struct nv_pc *pc, struct nv_instruction *i)
{
   ubyte sf = SFILE(i, 0);

   if (sf == NV_FILE_IMM) {
      sf = NV_FILE_MEM_C(0);
      nv_pc_alloc_immd(pc, i->src[0]);

      new_fixup(pc, NV50_FIXUP_DATA_RELOC, 0, SREG(i->src[0])->id, 0xffff, 9);
   }

   if (sf == NV_FILE_MEM_S ||
       sf == NV_FILE_MEM_P) {
      pc->emit[0] = 0x10000001;
      pc->emit[1] = 0x04200000 | (0x3c << 12);
      if (sf == NV_FILE_MEM_P)
         pc->emit[0] |= 0x01800000;
   } else
   if (sf >= NV_FILE_MEM_C(0) &&
       sf <= NV_FILE_MEM_C(15)) {
      pc->emit[0] = 0x10000001;
      pc->emit[1] = 0x24000000;
      pc->emit[1] |= (sf - NV_FILE_MEM_C(0)) << 22;
   } else
   if (sf >= NV_FILE_MEM_G(0) &&
       sf <= NV_FILE_MEM_G(15)) {
      pc->emit[0] = 0xd0000001 | ((sf - NV_FILE_MEM_G(0)) << 16);
      pc->emit[1] = 0xa0000000;

      assert(i->src[4] && SREG(i->src[4])->file == NV_FILE_GPR);
      SID(pc, i->src[4], 9);
   } else
   if (sf == NV_FILE_MEM_L) {
      pc->emit[0] = 0xd0000001;
      pc->emit[1] = 0x40000000;

      set_addr(pc, i);
   } else {
      NOUVEAU_ERR("invalid ld source file\n");
      abort();
   }

   set_ld_st_size(pc, (sf == NV_FILE_MEM_L) ? 8 : 0, STYPE(i, 0));

   set_dst(pc, i->def[0]);
   set_pred_wr(pc, i);

   set_pred(pc, i);

   if (sf < NV_FILE_MEM_G(0) ||
       sf > NV_FILE_MEM_G(15)) {
      SID(pc, i->src[0], 9);
      set_addr(pc, i);
   }
}

static void
emit_st(struct nv_pc *pc, struct nv_instruction *i)
{
   assert(SFILE(i, 1) == NV_FILE_GPR);
   assert(SFILE(i, 0) == NV_FILE_MEM_L);

   pc->emit[0] = 0xd0000001;
   pc->emit[1] = 0x60000000;

   SID(pc, i->src[1], 2);
   SID(pc, i->src[0], 9);

   set_ld_st_size(pc, 8, STYPE(i, 1));

   set_addr(pc, i);
   set_pred(pc, i);
}

static int
verify_mov(struct nv_instruction *i)
{
   ubyte sf = SFILE(i, 0);
   ubyte df = DFILE(i, 0);

   if (df == NV_FILE_GPR)
      return 0;

   if (df != NV_FILE_OUT &&
       df != NV_FILE_FLAGS &&
       df != NV_FILE_ADDR)
      return 1;

   if (sf == NV_FILE_FLAGS)
      return 2;
   if (sf == NV_FILE_ADDR)
      return 3;
   if (sf == NV_FILE_IMM && df != NV_FILE_OUT)
      return 4;

   return 0;
}

static void
emit_mov(struct nv_pc *pc, struct nv_instruction *i)
{
   assert(!verify_mov(i));

   if (SFILE(i, 0) >= NV_FILE_MEM_S)
      emit_ld(pc, i);
   else
   if (SFILE(i, 0) == NV_FILE_FLAGS) {
      pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2);
      pc->emit[1] = 0x20000780 | (SREG(i->src[0])->id << 12);
   } else
   if (SFILE(i, 0) == NV_FILE_ADDR) {
      pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2);
      pc->emit[1] = 0x40000780;
      set_a16_bits(pc, SREG(i->src[0])->id);
   } else
   if (DFILE(i, 0) == NV_FILE_FLAGS) {
      pc->emit[0] = 0x00000001;
      pc->emit[1] = 0xa0000000 | (1 << 6);
      set_pred(pc, i);
      pc->emit[0] |= SREG(i->src[0])->id << 9;
      pc->emit[1] |= DREG(i->def[0])->id << 4;
   } else
   if (SFILE(i, 0) == NV_FILE_IMM) {
      if (i->opcode == NV_OP_LDA) {
         emit_ld(pc, i);
      } else {
         pc->emit[0] = 0x10008001;
         pc->emit[1] = 0x00000003;

         emit_form_IMM(pc, i, 0);
      }
   } else {
      pc->emit[0] = 0x10000000;
      pc->emit[0] |= DREG(i->def[0])->id << 2;
      pc->emit[0] |= SREG(i->src[0])->id << 9;

      if (!i->is_long) {
         pc->emit[0] |= 0x8000;
      } else {
         pc->emit[0] |= 0x00000001;
         pc->emit[1] = 0x0403c000;

         set_pred(pc, i);
      }
   }

   if (DFILE(i, 0) == NV_FILE_OUT)
      pc->emit[1] |= 0x8;
}

static void
emit_interp(struct nv_pc *pc, struct nv_instruction *i)
{
   pc->emit[0] = 0x80000000;

   assert(DFILE(i, 0) == NV_FILE_GPR);
   assert(SFILE(i, 0) == NV_FILE_MEM_V);

   DID(pc, i->def[0], 2);
   SID(pc, i->src[0], 16);

   if (i->flat)
      pc->emit[0] |= 1 << 8;
   else
   if (i->opcode == NV_OP_PINTERP) {
      pc->emit[0] |= 1 << 25;
      pc->emit[0] |= SREG(i->src[1])->id << 9;
   }

   if (i->centroid)
      pc->emit[0] |= 1 << 24;

   assert(i->is_long || !i->flags_src);

   if (i->is_long) {
      set_pred(pc, i);

      pc->emit[1] |=
	      (pc->emit[0] & (3 << 24)) >> (24 - 16) |
	      (pc->emit[0] & (1 <<  8)) >> (18 -  8);

      pc->emit[0] |= 1;
      pc->emit[0] &= ~0x03000100;
   }
}

static void
emit_minmax(struct nv_pc *pc, struct nv_instruction *i)
{
   pc->emit[0] = 0x30000000;
   pc->emit[1] = (i->opcode == NV_OP_MIN) ? (2 << 28) : 0;

   switch (DTYPE(i, 0)) {
   case NV_TYPE_F32:
      pc->emit[0] |= 0x80000000;
      pc->emit[1] |= 0x80000000;
      break;
   case NV_TYPE_S32:
      pc->emit[1] |= 0x8c000000;
      break;
   case NV_TYPE_U32:
      pc->emit[1] |= 0x84000000;
      break;
   }
	
   emit_form_MAD(pc, i);

   if (i->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
   if (i->src[1]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00080000;
}

static void
emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
{
   pc->emit[0] = 0xb0000000;

   assert(!((i->src[0]->mod | i->src[1]->mod) & NV_MOD_ABS));

   if (SFILE(i, 1) == NV_FILE_IMM) {
      emit_form_IMM(pc, i, 0);

      if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000;
      if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
   } else
   if (i->is_long) {
      emit_form_ADD(pc, i);

      if (i->src[0]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 26;
      if (i->src[1]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 27;

      if (i->saturate)
         pc->emit[1] |= 0x20000000;
   } else {
      emit_form_MUL(pc, i);

      if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000;
      if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
   }
}

static void
emit_add_b32(struct nv_pc *pc, struct nv_instruction *i)
{
   pc->emit[0] = 0x20008000;

   if (SFILE(i, 1) == NV_FILE_IMM) {
      emit_form_IMM(pc, i, 0);
   } else
   if (i->is_long) {
      pc->emit[0] = 0x20000000;
      pc->emit[1] = 0x04000000;
      emit_form_ADD(pc, i);
   } else {
      emit_form_MUL(pc, i);
   }

   if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 28;
   if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
}

static void
emit_add_a16(struct nv_pc *pc, struct nv_instruction *i)
{
   int s = (i->opcode == NV_OP_MOV) ? 0 : 1;

   pc->emit[0] = 0xd0000001 | ((uint16_t)get_immd_u32(i->src[s]) << 9);
   pc->emit[1] = 0x20000000;

   pc->emit[0] |= (DREG(i->def[0])->id + 1) << 2;

   set_pred(pc, i);

   if (i->src[1])
      set_a16_bits(pc, SREG(i->src[1])->id + 1);
}

static void
emit_flow(struct nv_pc *pc, struct nv_instruction *i, ubyte flow_op)
{
   pc->emit[0] = 0x00000003 | (flow_op << 28);
   pc->emit[1] = 0x00000000;

   set_pred(pc, i);

   if (i->target && (i->opcode != NV_OP_BREAK)) {
      uint32_t pos = i->target->bin_pos;

      new_fixup(pc, NV50_FIXUP_CODE_RELOC, 0, pos, 0xffff << 11, 9);
      new_fixup(pc, NV50_FIXUP_CODE_RELOC, 1, pos, 0x3f << 14, -4);

      pc->emit[0] |= (pos / 4) << 11;
   }
}

static INLINE void
emit_add(struct nv_pc *pc, struct nv_instruction *i)
{
   if (DFILE(i, 0) == NV_FILE_ADDR)
      emit_add_a16(pc, i);
   else {
      switch (DTYPE(i, 0)) {
      case NV_TYPE_F32:
         emit_add_f32(pc, i);
         break;
      case NV_TYPE_U32:
      case NV_TYPE_S32:
         emit_add_b32(pc, i);
         break;
      }
   }
}

static void
emit_bitop2(struct nv_pc *pc, struct nv_instruction *i)
{
   pc->emit[0] = 0xd0000000;

   if (SFILE(i, 1) == NV_FILE_IMM) {
      emit_form_IMM(pc, i, 0);

      if (i->opcode == NV_OP_OR)
         pc->emit[0] |= 0x0100;
      else
      if (i->opcode == NV_OP_XOR)
         pc->emit[0] |= 0x8000;
   } else {
      emit_form_MAD(pc, i);

      pc->emit[1] |= 0x04000000;

      if (i->opcode == NV_OP_OR)
         pc->emit[1] |= 0x4000;
      else
      if (i->opcode == NV_OP_XOR)
         pc->emit[1] |= 0x8000;
   }
}

static void
emit_arl(struct nv_pc *pc, struct nv_instruction *i)
{
   assert(SFILE(i, 0) == NV_FILE_GPR);
   assert(SFILE(i, 1) == NV_FILE_IMM);

   assert(!i->flags_def);

   pc->emit[0] = 0x00000001;
   pc->emit[1] = 0xc0000000;

   pc->emit[0] |= (i->def[0]->reg.id + 1) << 2;
   set_pred(pc, i);
   set_src_0(pc, i->src[0]);
   pc->emit[0] |= (get_immd_u32(i->src[1]) & 0x3f) << 16;
}

static void
emit_shift(struct nv_pc *pc, struct nv_instruction *i)
{
   if (DFILE(i, 0) == NV_FILE_ADDR) {
      emit_arl(pc, i);
      return;
   }

   pc->emit[0] = 0x30000001;
   pc->emit[1] = 0xc4000000;

   if (i->opcode == NV_OP_SHR)
      pc->emit[1] |= 1 << 29;

   if (SFILE(i, 1) == NV_FILE_IMM) {
      pc->emit[1] |= 1 << 20;
      pc->emit[0] |= (get_immd_u32(i->src[1]) & 0x7f) << 16;

      set_pred(pc, i);
   } else
      emit_form_MAD(pc, i);

   if (STYPE(i, 0) == NV_TYPE_S32)
      pc->emit[1] |= 1 << 27;
}

static void
emit_flop(struct nv_pc *pc, struct nv_instruction *i)
{
   struct nv_ref *src0 = i->src[0];

   pc->emit[0] = 0x90000000;

   assert(STYPE(i, 0) == NV_TYPE_F32);
   assert(SFILE(i, 0) == NV_FILE_GPR);

   if (!i->is_long) {
      emit_form_MUL(pc, i);
      assert(i->opcode == NV_OP_RCP && !src0->mod);
      return;
   }

   pc->emit[1] = (i->opcode - NV_OP_RCP) << 29;

   emit_form_MAD(pc, i);

   if (src0->mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000;
   if (src0->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
}

static void
emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i)
{
   const boolean neg_mul = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG;
   const boolean neg_add = (i->src[2]->mod & NV_MOD_NEG);

   pc->emit[0] = 0xe0000000;

   if (!i->is_long) {
      emit_form_MUL(pc, i);
      assert(!neg_mul && !neg_add);
      return;
   }

   emit_form_MAD(pc, i);

   if (neg_mul) pc->emit[1] |= 0x04000000;
   if (neg_add) pc->emit[1] |= 0x08000000;

   if (i->saturate)
      pc->emit[1] |= 0x20000000;
}

static INLINE void
emit_mad(struct nv_pc *pc, struct nv_instruction *i)
{
   emit_mad_f32(pc, i);
}

static void
emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i)
{
   boolean neg = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG;

   pc->emit[0] = 0xc0000000;

   if (SFILE(i, 1) == NV_FILE_IMM) {
      emit_form_IMM(pc, i, 0);

      if (neg)
         pc->emit[0] |= 0x8000;
   } else
   if (i->is_long) {
      emit_form_MAD(pc, i);

      if (neg)
         pc->emit[1] |= 0x08 << 24;
   } else {
      emit_form_MUL(pc, i);

      if (neg)
         pc->emit[0] |= 0x8000;
   }
}

static void
emit_set(struct nv_pc *pc, struct nv_instruction *nvi)
{
   assert(nvi->is_long);

   pc->emit[0] = 0x30000000;
   pc->emit[1] = 0x60000000;

   pc->emit[1] |= nvi->set_cond << 14;

   switch (STYPE(nvi, 0)) {
   case NV_TYPE_U32: pc->emit[1] |= 0x04000000; break;
   case NV_TYPE_S32: pc->emit[1] |= 0x0c000000; break;
   case NV_TYPE_F32: pc->emit[0] |= 0x80000000; break;
   default:
      assert(0);
      break;
   }

   emit_form_MAD(pc, nvi);
}

#define CVT_RN    (0x00 << 16)
#define CVT_FLOOR (0x02 << 16)
#define CVT_CEIL  (0x04 << 16)
#define CVT_TRUNC (0x06 << 16)
#define CVT_SAT   (0x08 << 16)
#define CVT_ABS   (0x10 << 16)

#define CVT_X32_X32 0x04004000
#define CVT_X32_S32 0x04014000
#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32)
#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32)
#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32)
#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32)
#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32)
#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32)
#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32)
#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32)
#define CVT_U32_U32 ((0x00 << 24) | CVT_X32_X32)

#define CVT_NEG 0x20000000
#define CVT_RI  0x08000000

static void
emit_cvt(struct nv_pc *pc, struct nv_instruction *nvi)
{
   ubyte dst_type = nvi->def[0] ? DTYPE(nvi, 0) : STYPE(nvi, 0);

   pc->emit[0] = 0xa0000000;

   switch (dst_type) {
   case NV_TYPE_F32:
      switch (STYPE(nvi, 0)) {
      case NV_TYPE_F32: pc->emit[1] = CVT_F32_F32; break;
      case NV_TYPE_S32: pc->emit[1] = CVT_F32_S32; break;
      case NV_TYPE_U32: pc->emit[1] = CVT_F32_U32; break;
      }
      break;
   case NV_TYPE_S32:
      switch (STYPE(nvi, 0)) {
      case NV_TYPE_F32: pc->emit[1] = CVT_S32_F32; break;
      case NV_TYPE_S32: pc->emit[1] = CVT_S32_S32; break;
      case NV_TYPE_U32: pc->emit[1] = CVT_S32_U32; break;
      }
      break;
   case NV_TYPE_U32:
      switch (STYPE(nvi, 0)) {
      case NV_TYPE_F32: pc->emit[1] = CVT_U32_F32; break;
      case NV_TYPE_S32: pc->emit[1] = CVT_U32_S32; break;
      case NV_TYPE_U32: pc->emit[1] = CVT_U32_U32; break;
      }
      break;
   }
   if (pc->emit[1] == CVT_F32_F32 &&
       (nvi->opcode == NV_OP_CEIL || nvi->opcode == NV_OP_FLOOR ||
	nvi->opcode == NV_OP_TRUNC))
       pc->emit[1] |= CVT_RI;

   switch (nvi->opcode) {
   case NV_OP_CEIL:  pc->emit[1] |= CVT_CEIL; break;
   case NV_OP_FLOOR: pc->emit[1] |= CVT_FLOOR; break;
   case NV_OP_TRUNC: pc->emit[1] |= CVT_TRUNC; break;

   case NV_OP_ABS: pc->emit[1] |= CVT_ABS; break;
   case NV_OP_SAT: pc->emit[1] |= CVT_SAT; break;
   case NV_OP_NEG: pc->emit[1] |= CVT_NEG; break;
   default:
      assert(nvi->opcode == NV_OP_CVT);
      break;
   }
   assert(nvi->opcode != NV_OP_ABS || !(nvi->src[0]->mod & NV_MOD_NEG));

   if (nvi->src[0]->mod & NV_MOD_NEG) pc->emit[1] ^= CVT_NEG;
   if (nvi->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= CVT_ABS;

   emit_form_MAD(pc, nvi);
}

static void
emit_tex(struct nv_pc *pc, struct nv_instruction *i)
{
   pc->emit[0] = 0xf0000001;
   pc->emit[1] = 0x00000000;

   DID(pc, i->def[0], 2);

   set_pred(pc, i);

   pc->emit[0] |= i->tex_t << 9;
   pc->emit[0] |= i->tex_s << 17;

   pc->emit[0] |= (i->tex_argc - 1) << 22;

   pc->emit[0] |= (i->tex_mask & 0x3) << 25;
   pc->emit[1] |= (i->tex_mask & 0xc) << 12;

   if (i->tex_live)
      pc->emit[1] |= 4;

   if (i->tex_cube)
      pc->emit[0] |= 0x08000000;

   if (i->opcode == NV_OP_TXB)
      pc->emit[1] |= 0x20000000;
   else
   if (i->opcode == NV_OP_TXL)
      pc->emit[1] |= 0x40000000;
}

static void
emit_cvt2fixed(struct nv_pc *pc, struct nv_instruction *i)
{
   ubyte mod = i->src[0]->mod;

   pc->emit[0] = 0xb0000000;
   pc->emit[1] = 0xc0000000;

   if (i->opcode == NV_OP_PREEX2)
      pc->emit[1] |= 0x4000;

   emit_form_MAD(pc, i);

   if (mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000;
   if (mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
}

static void
emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
{
   assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR);

   pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0240001 : 0xc0140001;
   pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x86400000 : 0x89800000;

   DID(pc, i->def[0], 2);
   SID(pc, i->src[0], 9);
   SID(pc, i->src[0], 32 + 14);

   set_pred(pc, i);
   set_pred_wr(pc, i);
}

static void
emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
{
   assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR);

   pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0250001 : 0xc0150001;
   pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x85800000 : 0x8a400000;

   DID(pc, i->def[0], 2);
   SID(pc, i->src[0], 9);
   SID(pc, i->src[0], 32 + 14);

   set_pred(pc, i);
   set_pred_wr(pc, i);
}

static void
emit_quadop(struct nv_pc *pc, struct nv_instruction *i)
{
   pc->emit[0] = 0xc0000000;
   pc->emit[1] = 0x80000000;

   emit_form_ADD(pc, i);

   pc->emit[0] |= i->lanes << 16;

   pc->emit[0] |= (i->quadop & 0x03) << 20;
   pc->emit[1] |= (i->quadop & 0xfc) << 20;
}

void
nv50_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
{
   /* nv_print_instruction(i); */

   switch (i->opcode) {
   case NV_OP_MOV:
      if (DFILE(i, 0) == NV_FILE_ADDR)
         emit_add_a16(pc, i);
      else
         emit_mov(pc, i);
      break;
   case NV_OP_LDA:
      emit_mov(pc, i);
      break;
   case NV_OP_STA:
      emit_st(pc, i);
      break;
   case NV_OP_LINTERP:
   case NV_OP_PINTERP:
      emit_interp(pc, i);
      break;
   case NV_OP_ADD:
      emit_add(pc, i);
      break;
   case NV_OP_AND:
   case NV_OP_OR:
   case NV_OP_XOR:
      emit_bitop2(pc, i);
      break;
   case NV_OP_CVT:
   case NV_OP_ABS:
   case NV_OP_NEG:
   case NV_OP_SAT:
   case NV_OP_CEIL:
   case NV_OP_FLOOR:
   case NV_OP_TRUNC:
      emit_cvt(pc, i);
      break;
   case NV_OP_DFDX:
      emit_ddx(pc, i);
      break;
   case NV_OP_DFDY:
      emit_ddy(pc, i);
      break;
   case NV_OP_RCP:
   case NV_OP_RSQ:
   case NV_OP_LG2:
   case NV_OP_SIN:
   case NV_OP_COS:
   case NV_OP_EX2:
      emit_flop(pc, i);
      break;
   case NV_OP_PRESIN:
   case NV_OP_PREEX2:
      emit_cvt2fixed(pc, i);
      break;
   case NV_OP_MAD:
      emit_mad(pc, i);
      break;
   case NV_OP_MAX:
   case NV_OP_MIN:
      emit_minmax(pc, i);
      break;
   case NV_OP_MUL:
      emit_mul_f32(pc, i);
      break;
   case NV_OP_SET:
      emit_set(pc, i);
      break;
   case NV_OP_SHL:
   case NV_OP_SHR:
      emit_shift(pc, i);
      break;
   case NV_OP_TEX:
   case NV_OP_TXB:
   case NV_OP_TXL:
      emit_tex(pc, i);
      break;
   case NV_OP_QUADOP:
      emit_quadop(pc, i);
      break;
   case NV_OP_KIL:
      emit_flow(pc, i, 0x0);
      break;
   case NV_OP_BRA:
      emit_flow(pc, i, 0x1);
      break;
   case NV_OP_CALL:
      emit_flow(pc, i, 0x2);
      break;
   case NV_OP_RET:
      emit_flow(pc, i, 0x3);
      break;
   case NV_OP_BREAKADDR:
      emit_flow(pc, i, 0x4);
      break;
   case NV_OP_BREAK:
      emit_flow(pc, i, 0x5);
      break;
   case NV_OP_JOINAT:
      emit_flow(pc, i, 0xa);
      break;
   case NV_OP_NOP:
   case NV_OP_JOIN:
      pc->emit[0] = 0xf0000001;
      pc->emit[1] = 0xe0000000;
      break;
   case NV_OP_PHI:
   case NV_OP_UNDEF:
   case NV_OP_SUB:
      NOUVEAU_ERR("operation \"%s\" should have been eliminated\n",
                  nv_opcode_name(i->opcode));
      break;
   default:
      NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode);
      abort();
      break;
   }

   if (i->is_join) {
      assert(i->is_long && !(pc->emit[1] & 1));
      pc->emit[1] |= 2;
   }

   assert((pc->emit[0] & 1) == i->is_long);
}