/*
 * (C) Copyright IBM Corporation 2008
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

/**
 * \file
 * Real-time assembly generation interface for Cell B.E. SPEs.
 * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
 *
 * \author Ian Romanick <idr@us.ibm.com>
 */

#ifndef RTASM_PPC_SPE_H
#define RTASM_PPC_SPE_H

/** 4 bytes per instruction */
#define SPE_INST_SIZE 4

/** number of general-purpose SIMD registers */
#define SPE_NUM_REGS  128

/** Return Address register */
#define SPE_REG_RA  0

/** Stack Pointer register */
#define SPE_REG_SP  1


struct spe_function
{
   uint32_t *store;  /**< instruction buffer */
   uint num_inst;
   uint max_inst;

    /**
     * Mask of used / unused registers
     *
     * Each set bit corresponds to an available register.  Each cleared bit
     * corresponds to an allocated register.
     *
     * \sa
     * spe_allocate_register, spe_allocate_available_register,
     * spe_release_register
     */
    uint64_t regs[SPE_NUM_REGS / 64];
};

extern void spe_init_func(struct spe_function *p, unsigned code_size);
extern void spe_release_func(struct spe_function *p);

extern int spe_allocate_available_register(struct spe_function *p);
extern int spe_allocate_register(struct spe_function *p, int reg);
extern void spe_release_register(struct spe_function *p, int reg);

#endif /* RTASM_PPC_SPE_H */

#ifndef EMIT_
#define EMIT_(name, _op) \
    extern void _name (struct spe_function *p, unsigned rT)
#define EMIT_R(_name, _op) \
    extern void _name (struct spe_function *p, unsigned rT, unsigned rA)
#define EMIT_RR(_name, _op) \
    extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
			   unsigned rB)
#define EMIT_RRR(_name, _op) \
    extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
			   unsigned rB, unsigned rC)
#define EMIT_RI7(_name, _op) \
    extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
			   int imm)
#define EMIT_RI8(_name, _op, bias) \
    extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
			   int imm)
#define EMIT_RI10(_name, _op) \
    extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
			   int imm)
#define EMIT_RI16(_name, _op) \
    extern void _name (struct spe_function *p, unsigned rT, int imm)
#define EMIT_RI18(_name, _op) \
    extern void _name (struct spe_function *p, unsigned rT, int imm)
#define EMIT_I16(_name, _op) \
    extern void _name (struct spe_function *p, int imm)
#define UNDEF_EMIT_MACROS
#endif /* EMIT_ */


/* Memory load / store instructions
 */
EMIT_RI10(spe_lqd,  0x034);
EMIT_RR  (spe_lqx,  0x1c4);
EMIT_RI16(spe_lqa,  0x061);
EMIT_RI16(spe_lqr,  0x067);
EMIT_RI10(spe_stqd, 0x024);
EMIT_RR  (spe_stqx, 0x144);
EMIT_RI16(spe_stqa, 0x041);
EMIT_RI16(spe_stqr, 0x047);
EMIT_RI7 (spe_cbd,  0x1f4);
EMIT_RR  (spe_cbx,  0x1d4);
EMIT_RI7 (spe_chd,  0x1f5);
EMIT_RI7 (spe_chx,  0x1d5);
EMIT_RI7 (spe_cwd,  0x1f6);
EMIT_RI7 (spe_cwx,  0x1d6);
EMIT_RI7 (spe_cdd,  0x1f7);
EMIT_RI7 (spe_cdx,  0x1d7);


/* Constant formation instructions
 */
EMIT_RI16(spe_ilh,   0x083);
EMIT_RI16(spe_ilhu,  0x082);
EMIT_RI16(spe_il,    0x081);
EMIT_RI18(spe_ila,   0x021);
EMIT_RI16(spe_iohl,  0x0c1);
EMIT_RI16(spe_fsmbi, 0x065);



/* Integer and logical instructions
 */
EMIT_RR  (spe_ah,      0x0c8);
EMIT_RI10(spe_ahi,     0x01d);
EMIT_RR  (spe_a,       0x0c0);
EMIT_RI10(spe_ai,      0x01c);
EMIT_RR  (spe_sfh,     0x048);
EMIT_RI10(spe_sfhi,    0x00d);
EMIT_RR  (spe_sf,      0x040);
EMIT_RI10(spe_sfi,     0x00c);
EMIT_RR  (spe_addx,    0x340);
EMIT_RR  (spe_cg,      0x0c2);
EMIT_RR  (spe_cgx,     0x342);
EMIT_RR  (spe_sfx,     0x341);
EMIT_RR  (spe_bg,      0x042);
EMIT_RR  (spe_bgx,     0x343);
EMIT_RR  (spe_mpy,     0x3c4);
EMIT_RR  (spe_mpyu,    0x3cc);
EMIT_RI10(spe_mpyi,    0x074);
EMIT_RI10(spe_mpyui,   0x075);
EMIT_RRR (spe_mpya,    0x00c);
EMIT_RR  (spe_mpyh,    0x3c5);
EMIT_RR  (spe_mpys,    0x3c7);
EMIT_RR  (spe_mpyhh,   0x3c6);
EMIT_RR  (spe_mpyhha,  0x346);
EMIT_RR  (spe_mpyhhu,  0x3ce);
EMIT_RR  (spe_mpyhhau, 0x34e);
EMIT_R   (spe_clz,     0x2a5);
EMIT_R   (spe_cntb,    0x2b4);
EMIT_R   (spe_fsmb,    0x1b6);
EMIT_R   (spe_fsmh,    0x1b5);
EMIT_R   (spe_fsm,     0x1b4);
EMIT_R   (spe_gbb,     0x1b2);
EMIT_R   (spe_gbh,     0x1b1);
EMIT_R   (spe_gb,      0x1b0);
EMIT_RR  (spe_avgb,    0x0d3);
EMIT_RR  (spe_absdb,   0x053);
EMIT_RR  (spe_sumb,    0x253);
EMIT_R   (spe_xsbh,    0x2b6);
EMIT_R   (spe_xshw,    0x2ae);
EMIT_R   (spe_xswd,    0x2a6);
EMIT_RR  (spe_and,     0x0c1);
EMIT_RR  (spe_andc,    0x2c1);
EMIT_RI10(spe_andbi,   0x016);
EMIT_RI10(spe_andhi,   0x015);
EMIT_RI10(spe_andi,    0x014);
EMIT_RR  (spe_or,      0x041);
EMIT_RR  (spe_orc,     0x2c9);
EMIT_RI10(spe_orbi,    0x006);
EMIT_RI10(spe_orhi,    0x005);
EMIT_RI10(spe_ori,     0x004);
EMIT_R   (spe_orx,     0x1f0);
EMIT_RR  (spe_xor,     0x241);
EMIT_RI10(spe_xorbi,   0x026);
EMIT_RI10(spe_xorhi,   0x025);
EMIT_RI10(spe_xori,    0x024);
EMIT_RR  (spe_nand,    0x0c9);
EMIT_RR  (spe_nor,     0x049);
EMIT_RR  (spe_eqv,     0x249);
EMIT_RRR (spe_selb,    0x008);
EMIT_RRR (spe_shufb,   0x00b);


/* Shift and rotate instructions
 */
EMIT_RR  (spe_shlh,      0x05f);
EMIT_RI7 (spe_shlhi,     0x07f);
EMIT_RR  (spe_shl,       0x05b);
EMIT_RI7 (spe_shli,      0x07b);
EMIT_RR  (spe_shlqbi,    0x1db);
EMIT_RI7 (spe_shlqbii,   0x1fb);
EMIT_RR  (spe_shlqby,    0x1df);
EMIT_RI7 (spe_shlqbyi,   0x1ff);
EMIT_RR  (spe_shlqbybi,  0x1cf);
EMIT_RR  (spe_roth,      0x05c);
EMIT_RI7 (spe_rothi,     0x07c);
EMIT_RR  (spe_rot,       0x058);
EMIT_RI7 (spe_roti,      0x078);
EMIT_RR  (spe_rotqby,    0x1dc);
EMIT_RI7 (spe_rotqbyi,   0x1fc);
EMIT_RR  (spe_rotqbybi,  0x1cc);
EMIT_RR  (spe_rotqbi,    0x1d8);
EMIT_RI7 (spe_rotqbii,   0x1f8);
EMIT_RR  (spe_rothm,     0x05d);
EMIT_RI7 (spe_rothmi,    0x07d);
EMIT_RR  (spe_rotm,      0x059);
EMIT_RI7 (spe_rotmi,     0x079);
EMIT_RR  (spe_rotqmby,   0x1dd);
EMIT_RI7 (spe_rotqmbyi,  0x1fd);
EMIT_RR  (spe_rotqmbybi, 0x1cd);
EMIT_RR  (spe_rotqmbi,   0x1c9);
EMIT_RI7 (spe_rotqmbii,  0x1f9);
EMIT_RR  (spe_rotmah,    0x05e);
EMIT_RI7 (spe_rotmahi,   0x07e);
EMIT_RR  (spe_rotma,     0x05a);
EMIT_RI7 (spe_rotmai,    0x07a);


/* Compare, branch, and halt instructions
 */
EMIT_RR  (spe_heq,       0x3d8);
EMIT_RI10(spe_heqi,      0x07f);
EMIT_RR  (spe_hgt,       0x258);
EMIT_RI10(spe_hgti,      0x04f);
EMIT_RR  (spe_hlgt,      0x2d8);
EMIT_RI10(spe_hlgti,     0x05f);
EMIT_RR  (spe_ceqb,      0x3d0);
EMIT_RI10(spe_ceqbi,     0x07e);
EMIT_RR  (spe_ceqh,      0x3c8);
EMIT_RI10(spe_ceqhi,     0x07d);
EMIT_RR  (spe_ceq,       0x3c0);
EMIT_RI10(spe_ceqi,      0x07c);
EMIT_RR  (spe_cgtb,      0x250);
EMIT_RI10(spe_cgtbi,     0x04e);
EMIT_RR  (spe_cgth,      0x248);
EMIT_RI10(spe_cgthi,     0x04d);
EMIT_RR  (spe_cgt,       0x240);
EMIT_RI10(spe_cgti,      0x04c);
EMIT_RR  (spe_clgtb,     0x2d0);
EMIT_RI10(spe_clgtbi,    0x05e);
EMIT_RR  (spe_clgth,     0x2c8);
EMIT_RI10(spe_clgthi,    0x05d);
EMIT_RR  (spe_clgt,      0x2c0);
EMIT_RI10(spe_clgti,     0x05c);
EMIT_I16 (spe_br,        0x064);
EMIT_I16 (spe_bra,       0x060);
EMIT_RI16(spe_brsl,      0x066);
EMIT_RI16(spe_brasl,     0x062);
EMIT_RI16(spe_brnz,      0x042);
EMIT_RI16(spe_brz,       0x040);
EMIT_RI16(spe_brhnz,     0x046);
EMIT_RI16(spe_brhz,      0x044);

extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e);
extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e);
extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA,
    int d, int e);
extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA,
    int d, int e);
extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA,
    int d, int e);
extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA,
    int d, int e);
extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA,
    int d, int e);
extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA,
    int d, int e);


/** Load/splat immediate float into rT. */
extern void
spe_load_float(struct spe_function *p, unsigned rT, float x);

/** Load/splat immediate int into rT. */
extern void
spe_load_int(struct spe_function *p, unsigned rT, int i);

/** Replicate word 0 of rA across rT. */
extern void
spe_splat(struct spe_function *p, unsigned rT, unsigned rA);

/** Complement/invert all bits in rT. */
extern void
spe_complement(struct spe_function *p, unsigned rT);

/** rT = rA. */
extern void
spe_move(struct spe_function *p, unsigned rT, unsigned rA);

/** rT = {0,0,0,0}. */
extern void
spe_zero(struct spe_function *p, unsigned rT);


/* Floating-point instructions
 */
EMIT_RR  (spe_fa,         0x2c4);
EMIT_RR  (spe_dfa,        0x2cc);
EMIT_RR  (spe_fs,         0x2c5);
EMIT_RR  (spe_dfs,        0x2cd);
EMIT_RR  (spe_fm,         0x2c6);
EMIT_RR  (spe_dfm,        0x2ce);
EMIT_RRR (spe_fma,        0x00e);
EMIT_RR  (spe_dfma,       0x35c);
EMIT_RRR (spe_fnms,       0x00d);
EMIT_RR  (spe_dfnms,      0x35e);
EMIT_RRR (spe_fms,        0x00f);
EMIT_RR  (spe_dfms,       0x35d);
EMIT_RR  (spe_dfnma,      0x35f);
EMIT_R   (spe_frest,      0x1b8);
EMIT_R   (spe_frsqest,    0x1b9);
EMIT_RR  (spe_fi,         0x3d4);
EMIT_RI8 (spe_csflt,      0x1da, 155);
EMIT_RI8 (spe_cflts,      0x1d8, 173);
EMIT_RI8 (spe_cuflt,      0x1db, 155);
EMIT_RI8 (spe_cfltu,      0x1d9, 173);
EMIT_R   (spe_frds,       0x3b9);
EMIT_R   (spe_fesd,       0x3b8);
EMIT_RR  (spe_dfceq,      0x3c3);
EMIT_RR  (spe_dfcmeq,     0x3cb);
EMIT_RR  (spe_dfcgt,      0x2c3);
EMIT_RR  (spe_dfcmgt,     0x2cb);
EMIT_RI7 (spe_dftsv,      0x3bf);
EMIT_RR  (spe_fceq,       0x3c2);
EMIT_RR  (spe_fcmeq,      0x3ca);
EMIT_RR  (spe_fcgt,       0x2c2);
EMIT_RR  (spe_fcmgt,      0x2ca);
EMIT_R   (spe_fscrwr,     0x3ba);
EMIT_    (spe_fscrrd,     0x398);


/* Channel instructions
 */
EMIT_R   (spe_rdch,       0x00d);
EMIT_R   (spe_rdchcnt,    0x00f);
EMIT_R   (spe_wrch,       0x10d);


#ifdef UNDEF_EMIT_MACROS
#undef EMIT_
#undef EMIT_R
#undef EMIT_RR
#undef EMIT_RRR
#undef EMIT_RI7
#undef EMIT_RI8
#undef EMIT_RI10
#undef EMIT_RI16
#undef EMIT_RI18
#undef EMIT_I16
#undef UNDEF_EMIT_MACROS
#endif /* EMIT_ */