diff options
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt.c | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_ppc.c | 10 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_ppc.h | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 7 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 380 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 65 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/p_debug.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_math.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_math.h | 6 |
11 files changed, 285 insertions, 212 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 3c175f31d8..18f24e5980 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -35,6 +35,7 @@ #include "draw/draw_pt.h" #include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" +#include "util/u_math.h" static unsigned trim( unsigned count, unsigned first, unsigned incr ) { diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 6d11263be8..b65bfa7bbd 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -251,7 +251,7 @@ union vx_inst { } inst; }; -static inline void +static INLINE void emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) { union vx_inst inst; @@ -276,7 +276,7 @@ union vxr_inst { } inst; }; -static inline void +static INLINE void emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) { union vxr_inst inst; @@ -302,7 +302,7 @@ union va_inst { } inst; }; -static inline void +static INLINE void emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) { union va_inst inst; @@ -419,7 +419,7 @@ union d_inst { } inst; }; -static inline void +static INLINE void emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) { union d_inst inst; @@ -446,7 +446,7 @@ union a_inst { } inst; }; -static inline void +static INLINE void emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, uint rc) { diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index afb4704c39..08212a2a25 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -245,6 +245,9 @@ extern void ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm); extern void +ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb); extern void @@ -310,6 +313,9 @@ ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset); extern void ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb); +extern void +ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset); + /** diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index f8568f690b..1bd9f1c8dd 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -958,9 +958,12 @@ spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsig void spe_splat(struct spe_function *p, unsigned rT, unsigned rA) { + /* Use a temporary, just in case rT == rA */ + unsigned int tmp_reg = spe_allocate_available_register(p); /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ - spe_ila(p, rT, 0x00010203); - spe_shufb(p, rT, rA, rA, rT); + spe_ila(p, tmp_reg, 0x00010203); + spe_shufb(p, rT, rA, rA, tmp_reg); + spe_release_register(p, tmp_reg); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index d6a3c02f20..f1500cef29 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -100,199 +100,199 @@ extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); #endif /* RTASM_PPC_SPE_H */ #ifndef EMIT_ -#define EMIT_(name, _op) \ - extern void _name (struct spe_function *p, unsigned rT) +#define EMIT_(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT); #define EMIT_R(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA) + extern void _name (struct spe_function *p, unsigned rT, unsigned rA); #define EMIT_RR(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB) + unsigned rB); #define EMIT_RRR(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB, unsigned rC) + unsigned rB, unsigned rC); #define EMIT_RI7(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + int imm); #define EMIT_RI8(_name, _op, bias) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + int imm); #define EMIT_RI10(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + int imm); #define EMIT_RI10s(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + int imm); #define EMIT_RI16(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) + extern void _name (struct spe_function *p, unsigned rT, int imm); #define EMIT_RI18(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) + extern void _name (struct spe_function *p, unsigned rT, int imm); #define EMIT_I16(_name, _op) \ - extern void _name (struct spe_function *p, int imm) + extern void _name (struct spe_function *p, int imm); #define UNDEF_EMIT_MACROS #endif /* EMIT_ */ /* Memory load / store instructions */ -EMIT_RR (spe_lqx, 0x1c4); -EMIT_RI16(spe_lqa, 0x061); -EMIT_RI16(spe_lqr, 0x067); -EMIT_RR (spe_stqx, 0x144); -EMIT_RI16(spe_stqa, 0x041); -EMIT_RI16(spe_stqr, 0x047); -EMIT_RI7 (spe_cbd, 0x1f4); -EMIT_RR (spe_cbx, 0x1d4); -EMIT_RI7 (spe_chd, 0x1f5); -EMIT_RI7 (spe_chx, 0x1d5); -EMIT_RI7 (spe_cwd, 0x1f6); -EMIT_RI7 (spe_cwx, 0x1d6); -EMIT_RI7 (spe_cdd, 0x1f7); -EMIT_RI7 (spe_cdx, 0x1d7); +EMIT_RR (spe_lqx, 0x1c4) +EMIT_RI16(spe_lqa, 0x061) +EMIT_RI16(spe_lqr, 0x067) +EMIT_RR (spe_stqx, 0x144) +EMIT_RI16(spe_stqa, 0x041) +EMIT_RI16(spe_stqr, 0x047) +EMIT_RI7 (spe_cbd, 0x1f4) +EMIT_RR (spe_cbx, 0x1d4) +EMIT_RI7 (spe_chd, 0x1f5) +EMIT_RI7 (spe_chx, 0x1d5) +EMIT_RI7 (spe_cwd, 0x1f6) +EMIT_RI7 (spe_cwx, 0x1d6) +EMIT_RI7 (spe_cdd, 0x1f7) +EMIT_RI7 (spe_cdx, 0x1d7) /* Constant formation instructions */ -EMIT_RI16(spe_ilh, 0x083); -EMIT_RI16(spe_ilhu, 0x082); -EMIT_RI16(spe_il, 0x081); -EMIT_RI18(spe_ila, 0x021); -EMIT_RI16(spe_iohl, 0x0c1); -EMIT_RI16(spe_fsmbi, 0x065); +EMIT_RI16(spe_ilh, 0x083) +EMIT_RI16(spe_ilhu, 0x082) +EMIT_RI16(spe_il, 0x081) +EMIT_RI18(spe_ila, 0x021) +EMIT_RI16(spe_iohl, 0x0c1) +EMIT_RI16(spe_fsmbi, 0x065) /* Integer and logical instructions */ -EMIT_RR (spe_ah, 0x0c8); -EMIT_RI10(spe_ahi, 0x01d); -EMIT_RR (spe_a, 0x0c0); -EMIT_RI10s(spe_ai, 0x01c); -EMIT_RR (spe_sfh, 0x048); -EMIT_RI10(spe_sfhi, 0x00d); -EMIT_RR (spe_sf, 0x040); -EMIT_RI10(spe_sfi, 0x00c); -EMIT_RR (spe_addx, 0x340); -EMIT_RR (spe_cg, 0x0c2); -EMIT_RR (spe_cgx, 0x342); -EMIT_RR (spe_sfx, 0x341); -EMIT_RR (spe_bg, 0x042); -EMIT_RR (spe_bgx, 0x343); -EMIT_RR (spe_mpy, 0x3c4); -EMIT_RR (spe_mpyu, 0x3cc); -EMIT_RI10(spe_mpyi, 0x074); -EMIT_RI10(spe_mpyui, 0x075); -EMIT_RRR (spe_mpya, 0x00c); -EMIT_RR (spe_mpyh, 0x3c5); -EMIT_RR (spe_mpys, 0x3c7); -EMIT_RR (spe_mpyhh, 0x3c6); -EMIT_RR (spe_mpyhha, 0x346); -EMIT_RR (spe_mpyhhu, 0x3ce); -EMIT_RR (spe_mpyhhau, 0x34e); -EMIT_R (spe_clz, 0x2a5); -EMIT_R (spe_cntb, 0x2b4); -EMIT_R (spe_fsmb, 0x1b6); -EMIT_R (spe_fsmh, 0x1b5); -EMIT_R (spe_fsm, 0x1b4); -EMIT_R (spe_gbb, 0x1b2); -EMIT_R (spe_gbh, 0x1b1); -EMIT_R (spe_gb, 0x1b0); -EMIT_RR (spe_avgb, 0x0d3); -EMIT_RR (spe_absdb, 0x053); -EMIT_RR (spe_sumb, 0x253); -EMIT_R (spe_xsbh, 0x2b6); -EMIT_R (spe_xshw, 0x2ae); -EMIT_R (spe_xswd, 0x2a6); -EMIT_RR (spe_and, 0x0c1); -EMIT_RR (spe_andc, 0x2c1); -EMIT_RI10s(spe_andbi, 0x016); -EMIT_RI10s(spe_andhi, 0x015); -EMIT_RI10s(spe_andi, 0x014); -EMIT_RR (spe_or, 0x041); -EMIT_RR (spe_orc, 0x2c9); -EMIT_RI10s(spe_orbi, 0x006); -EMIT_RI10s(spe_orhi, 0x005); -EMIT_RI10s(spe_ori, 0x004); -EMIT_R (spe_orx, 0x1f0); -EMIT_RR (spe_xor, 0x241); -EMIT_RI10s(spe_xorbi, 0x026); -EMIT_RI10s(spe_xorhi, 0x025); -EMIT_RI10s(spe_xori, 0x024); -EMIT_RR (spe_nand, 0x0c9); -EMIT_RR (spe_nor, 0x049); -EMIT_RR (spe_eqv, 0x249); -EMIT_RRR (spe_selb, 0x008); -EMIT_RRR (spe_shufb, 0x00b); +EMIT_RR (spe_ah, 0x0c8) +EMIT_RI10(spe_ahi, 0x01d) +EMIT_RR (spe_a, 0x0c0) +EMIT_RI10s(spe_ai, 0x01c) +EMIT_RR (spe_sfh, 0x048) +EMIT_RI10(spe_sfhi, 0x00d) +EMIT_RR (spe_sf, 0x040) +EMIT_RI10(spe_sfi, 0x00c) +EMIT_RR (spe_addx, 0x340) +EMIT_RR (spe_cg, 0x0c2) +EMIT_RR (spe_cgx, 0x342) +EMIT_RR (spe_sfx, 0x341) +EMIT_RR (spe_bg, 0x042) +EMIT_RR (spe_bgx, 0x343) +EMIT_RR (spe_mpy, 0x3c4) +EMIT_RR (spe_mpyu, 0x3cc) +EMIT_RI10(spe_mpyi, 0x074) +EMIT_RI10(spe_mpyui, 0x075) +EMIT_RRR (spe_mpya, 0x00c) +EMIT_RR (spe_mpyh, 0x3c5) +EMIT_RR (spe_mpys, 0x3c7) +EMIT_RR (spe_mpyhh, 0x3c6) +EMIT_RR (spe_mpyhha, 0x346) +EMIT_RR (spe_mpyhhu, 0x3ce) +EMIT_RR (spe_mpyhhau, 0x34e) +EMIT_R (spe_clz, 0x2a5) +EMIT_R (spe_cntb, 0x2b4) +EMIT_R (spe_fsmb, 0x1b6) +EMIT_R (spe_fsmh, 0x1b5) +EMIT_R (spe_fsm, 0x1b4) +EMIT_R (spe_gbb, 0x1b2) +EMIT_R (spe_gbh, 0x1b1) +EMIT_R (spe_gb, 0x1b0) +EMIT_RR (spe_avgb, 0x0d3) +EMIT_RR (spe_absdb, 0x053) +EMIT_RR (spe_sumb, 0x253) +EMIT_R (spe_xsbh, 0x2b6) +EMIT_R (spe_xshw, 0x2ae) +EMIT_R (spe_xswd, 0x2a6) +EMIT_RR (spe_and, 0x0c1) +EMIT_RR (spe_andc, 0x2c1) +EMIT_RI10s(spe_andbi, 0x016) +EMIT_RI10s(spe_andhi, 0x015) +EMIT_RI10s(spe_andi, 0x014) +EMIT_RR (spe_or, 0x041) +EMIT_RR (spe_orc, 0x2c9) +EMIT_RI10s(spe_orbi, 0x006) +EMIT_RI10s(spe_orhi, 0x005) +EMIT_RI10s(spe_ori, 0x004) +EMIT_R (spe_orx, 0x1f0) +EMIT_RR (spe_xor, 0x241) +EMIT_RI10s(spe_xorbi, 0x026) +EMIT_RI10s(spe_xorhi, 0x025) +EMIT_RI10s(spe_xori, 0x024) +EMIT_RR (spe_nand, 0x0c9) +EMIT_RR (spe_nor, 0x049) +EMIT_RR (spe_eqv, 0x249) +EMIT_RRR (spe_selb, 0x008) +EMIT_RRR (spe_shufb, 0x00b) /* Shift and rotate instructions */ -EMIT_RR (spe_shlh, 0x05f); -EMIT_RI7 (spe_shlhi, 0x07f); -EMIT_RR (spe_shl, 0x05b); -EMIT_RI7 (spe_shli, 0x07b); -EMIT_RR (spe_shlqbi, 0x1db); -EMIT_RI7 (spe_shlqbii, 0x1fb); -EMIT_RR (spe_shlqby, 0x1df); -EMIT_RI7 (spe_shlqbyi, 0x1ff); -EMIT_RR (spe_shlqbybi, 0x1cf); -EMIT_RR (spe_roth, 0x05c); -EMIT_RI7 (spe_rothi, 0x07c); -EMIT_RR (spe_rot, 0x058); -EMIT_RI7 (spe_roti, 0x078); -EMIT_RR (spe_rotqby, 0x1dc); -EMIT_RI7 (spe_rotqbyi, 0x1fc); -EMIT_RR (spe_rotqbybi, 0x1cc); -EMIT_RR (spe_rotqbi, 0x1d8); -EMIT_RI7 (spe_rotqbii, 0x1f8); -EMIT_RR (spe_rothm, 0x05d); -EMIT_RI7 (spe_rothmi, 0x07d); -EMIT_RR (spe_rotm, 0x059); -EMIT_RI7 (spe_rotmi, 0x079); -EMIT_RR (spe_rotqmby, 0x1dd); -EMIT_RI7 (spe_rotqmbyi, 0x1fd); -EMIT_RR (spe_rotqmbybi, 0x1cd); -EMIT_RR (spe_rotqmbi, 0x1c9); -EMIT_RI7 (spe_rotqmbii, 0x1f9); -EMIT_RR (spe_rotmah, 0x05e); -EMIT_RI7 (spe_rotmahi, 0x07e); -EMIT_RR (spe_rotma, 0x05a); -EMIT_RI7 (spe_rotmai, 0x07a); +EMIT_RR (spe_shlh, 0x05f) +EMIT_RI7 (spe_shlhi, 0x07f) +EMIT_RR (spe_shl, 0x05b) +EMIT_RI7 (spe_shli, 0x07b) +EMIT_RR (spe_shlqbi, 0x1db) +EMIT_RI7 (spe_shlqbii, 0x1fb) +EMIT_RR (spe_shlqby, 0x1df) +EMIT_RI7 (spe_shlqbyi, 0x1ff) +EMIT_RR (spe_shlqbybi, 0x1cf) +EMIT_RR (spe_roth, 0x05c) +EMIT_RI7 (spe_rothi, 0x07c) +EMIT_RR (spe_rot, 0x058) +EMIT_RI7 (spe_roti, 0x078) +EMIT_RR (spe_rotqby, 0x1dc) +EMIT_RI7 (spe_rotqbyi, 0x1fc) +EMIT_RR (spe_rotqbybi, 0x1cc) +EMIT_RR (spe_rotqbi, 0x1d8) +EMIT_RI7 (spe_rotqbii, 0x1f8) +EMIT_RR (spe_rothm, 0x05d) +EMIT_RI7 (spe_rothmi, 0x07d) +EMIT_RR (spe_rotm, 0x059) +EMIT_RI7 (spe_rotmi, 0x079) +EMIT_RR (spe_rotqmby, 0x1dd) +EMIT_RI7 (spe_rotqmbyi, 0x1fd) +EMIT_RR (spe_rotqmbybi, 0x1cd) +EMIT_RR (spe_rotqmbi, 0x1c9) +EMIT_RI7 (spe_rotqmbii, 0x1f9) +EMIT_RR (spe_rotmah, 0x05e) +EMIT_RI7 (spe_rotmahi, 0x07e) +EMIT_RR (spe_rotma, 0x05a) +EMIT_RI7 (spe_rotmai, 0x07a) /* Compare, branch, and halt instructions */ -EMIT_RR (spe_heq, 0x3d8); -EMIT_RI10(spe_heqi, 0x07f); -EMIT_RR (spe_hgt, 0x258); -EMIT_RI10(spe_hgti, 0x04f); -EMIT_RR (spe_hlgt, 0x2d8); -EMIT_RI10(spe_hlgti, 0x05f); -EMIT_RR (spe_ceqb, 0x3d0); -EMIT_RI10(spe_ceqbi, 0x07e); -EMIT_RR (spe_ceqh, 0x3c8); -EMIT_RI10(spe_ceqhi, 0x07d); -EMIT_RR (spe_ceq, 0x3c0); -EMIT_RI10(spe_ceqi, 0x07c); -EMIT_RR (spe_cgtb, 0x250); -EMIT_RI10(spe_cgtbi, 0x04e); -EMIT_RR (spe_cgth, 0x248); -EMIT_RI10(spe_cgthi, 0x04d); -EMIT_RR (spe_cgt, 0x240); -EMIT_RI10(spe_cgti, 0x04c); -EMIT_RR (spe_clgtb, 0x2d0); -EMIT_RI10(spe_clgtbi, 0x05e); -EMIT_RR (spe_clgth, 0x2c8); -EMIT_RI10(spe_clgthi, 0x05d); -EMIT_RR (spe_clgt, 0x2c0); -EMIT_RI10(spe_clgti, 0x05c); -EMIT_I16 (spe_br, 0x064); -EMIT_I16 (spe_bra, 0x060); -EMIT_RI16(spe_brsl, 0x066); -EMIT_RI16(spe_brasl, 0x062); -EMIT_RI16(spe_brnz, 0x042); -EMIT_RI16(spe_brz, 0x040); -EMIT_RI16(spe_brhnz, 0x046); -EMIT_RI16(spe_brhz, 0x044); +EMIT_RR (spe_heq, 0x3d8) +EMIT_RI10(spe_heqi, 0x07f) +EMIT_RR (spe_hgt, 0x258) +EMIT_RI10(spe_hgti, 0x04f) +EMIT_RR (spe_hlgt, 0x2d8) +EMIT_RI10(spe_hlgti, 0x05f) +EMIT_RR (spe_ceqb, 0x3d0) +EMIT_RI10(spe_ceqbi, 0x07e) +EMIT_RR (spe_ceqh, 0x3c8) +EMIT_RI10(spe_ceqhi, 0x07d) +EMIT_RR (spe_ceq, 0x3c0) +EMIT_RI10(spe_ceqi, 0x07c) +EMIT_RR (spe_cgtb, 0x250) +EMIT_RI10(spe_cgtbi, 0x04e) +EMIT_RR (spe_cgth, 0x248) +EMIT_RI10(spe_cgthi, 0x04d) +EMIT_RR (spe_cgt, 0x240) +EMIT_RI10(spe_cgti, 0x04c) +EMIT_RR (spe_clgtb, 0x2d0) +EMIT_RI10(spe_clgtbi, 0x05e) +EMIT_RR (spe_clgth, 0x2c8) +EMIT_RI10(spe_clgthi, 0x05d) +EMIT_RR (spe_clgt, 0x2c0) +EMIT_RI10(spe_clgti, 0x05c) +EMIT_I16 (spe_br, 0x064) +EMIT_I16 (spe_bra, 0x060) +EMIT_RI16(spe_brsl, 0x066) +EMIT_RI16(spe_brasl, 0x062) +EMIT_RI16(spe_brnz, 0x042) +EMIT_RI16(spe_brz, 0x040) +EMIT_RI16(spe_brhnz, 0x046) +EMIT_RI16(spe_brhz, 0x044) extern void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset); @@ -375,46 +375,46 @@ spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); /* Floating-point instructions */ -EMIT_RR (spe_fa, 0x2c4); -EMIT_RR (spe_dfa, 0x2cc); -EMIT_RR (spe_fs, 0x2c5); -EMIT_RR (spe_dfs, 0x2cd); -EMIT_RR (spe_fm, 0x2c6); -EMIT_RR (spe_dfm, 0x2ce); -EMIT_RRR (spe_fma, 0x00e); -EMIT_RR (spe_dfma, 0x35c); -EMIT_RRR (spe_fnms, 0x00d); -EMIT_RR (spe_dfnms, 0x35e); -EMIT_RRR (spe_fms, 0x00f); -EMIT_RR (spe_dfms, 0x35d); -EMIT_RR (spe_dfnma, 0x35f); -EMIT_R (spe_frest, 0x1b8); -EMIT_R (spe_frsqest, 0x1b9); -EMIT_RR (spe_fi, 0x3d4); -EMIT_RI8 (spe_csflt, 0x1da, 155); -EMIT_RI8 (spe_cflts, 0x1d8, 173); -EMIT_RI8 (spe_cuflt, 0x1db, 155); -EMIT_RI8 (spe_cfltu, 0x1d9, 173); -EMIT_R (spe_frds, 0x3b9); -EMIT_R (spe_fesd, 0x3b8); -EMIT_RR (spe_dfceq, 0x3c3); -EMIT_RR (spe_dfcmeq, 0x3cb); -EMIT_RR (spe_dfcgt, 0x2c3); -EMIT_RR (spe_dfcmgt, 0x2cb); -EMIT_RI7 (spe_dftsv, 0x3bf); -EMIT_RR (spe_fceq, 0x3c2); -EMIT_RR (spe_fcmeq, 0x3ca); -EMIT_RR (spe_fcgt, 0x2c2); -EMIT_RR (spe_fcmgt, 0x2ca); -EMIT_R (spe_fscrwr, 0x3ba); -EMIT_ (spe_fscrrd, 0x398); +EMIT_RR (spe_fa, 0x2c4) +EMIT_RR (spe_dfa, 0x2cc) +EMIT_RR (spe_fs, 0x2c5) +EMIT_RR (spe_dfs, 0x2cd) +EMIT_RR (spe_fm, 0x2c6) +EMIT_RR (spe_dfm, 0x2ce) +EMIT_RRR (spe_fma, 0x00e) +EMIT_RR (spe_dfma, 0x35c) +EMIT_RRR (spe_fnms, 0x00d) +EMIT_RR (spe_dfnms, 0x35e) +EMIT_RRR (spe_fms, 0x00f) +EMIT_RR (spe_dfms, 0x35d) +EMIT_RR (spe_dfnma, 0x35f) +EMIT_R (spe_frest, 0x1b8) +EMIT_R (spe_frsqest, 0x1b9) +EMIT_RR (spe_fi, 0x3d4) +EMIT_RI8 (spe_csflt, 0x1da, 155) +EMIT_RI8 (spe_cflts, 0x1d8, 173) +EMIT_RI8 (spe_cuflt, 0x1db, 155) +EMIT_RI8 (spe_cfltu, 0x1d9, 173) +EMIT_R (spe_frds, 0x3b9) +EMIT_R (spe_fesd, 0x3b8) +EMIT_RR (spe_dfceq, 0x3c3) +EMIT_RR (spe_dfcmeq, 0x3cb) +EMIT_RR (spe_dfcgt, 0x2c3) +EMIT_RR (spe_dfcmgt, 0x2cb) +EMIT_RI7 (spe_dftsv, 0x3bf) +EMIT_RR (spe_fceq, 0x3c2) +EMIT_RR (spe_fcmeq, 0x3ca) +EMIT_RR (spe_fcgt, 0x2c2) +EMIT_RR (spe_fcmgt, 0x2ca) +EMIT_R (spe_fscrwr, 0x3ba) +EMIT_ (spe_fscrrd, 0x398) /* Channel instructions */ -EMIT_R (spe_rdch, 0x00d); -EMIT_R (spe_rdchcnt, 0x00f); -EMIT_R (spe_wrch, 0x10d); +EMIT_R (spe_rdch, 0x00d) +EMIT_R (spe_rdchcnt, 0x00f) +EMIT_R (spe_wrch, 0x10d) #ifdef UNDEF_EMIT_MACROS diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 1da04ab7e0..4a217454dd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1037,11 +1037,28 @@ fetch_source( union tgsi_exec_channel index; uint swizzle; + /* We start with a direct index into a register file. + * + * file[1], + * where: + * file = SrcRegister.File + * [1] = SrcRegister.Index + */ index.i[0] = index.i[1] = index.i[2] = index.i[3] = reg->SrcRegister.Index; + /* There is an extra source register that indirectly subscripts + * a register file. The direct index now becomes an offset + * that is being added to the indirect register. + * + * file[ind[2].x+1], + * where: + * ind = SrcRegisterInd.File + * [2] = SrcRegisterInd.Index + * .x = SrcRegisterInd.SwizzleX + */ if (reg->SrcRegister.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; @@ -1078,19 +1095,31 @@ fetch_source( } } - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { + /* There is an extra source register that is a second + * subscript to a register file. Effectively it means that + * the register file is actually a 2D array of registers. + * + * file[1][3] == file[1*sizeof(file[1])+3], + * where: + * [3] = SrcRegisterDim.Index + */ + if (reg->SrcRegister.Dimension) { + /* The size of the first-order array depends on the register file type. + * We need to multiply the index to the first array to get an effective, + * "flat" index that points to the beginning of the second-order array. + */ + switch (reg->SrcRegister.File) { case TGSI_FILE_INPUT: - index.i[0] *= 17; - index.i[1] *= 17; - index.i[2] *= 17; - index.i[3] *= 17; + index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; break; case TGSI_FILE_CONSTANT: - index.i[0] *= 4096; - index.i[1] *= 4096; - index.i[2] *= 4096; - index.i[3] *= 4096; + index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; break; default: assert( 0 ); @@ -1101,6 +1130,17 @@ fetch_source( index.i[2] += reg->SrcRegisterDim.Index; index.i[3] += reg->SrcRegisterDim.Index; + /* Again, the second subscript index can be addressed indirectly + * identically to the first one. + * Nothing stops us from indirectly addressing the indirect register, + * but there is no need for that, so we won't exercise it. + * + * file[1][ind[4].y+3], + * where: + * ind = SrcRegisterDimInd.File + * [4] = SrcRegisterDimInd.Index + * .y = SrcRegisterDimInd.SwizzleX + */ if (reg->SrcRegisterDim.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; @@ -1133,6 +1173,11 @@ fetch_source( index.i[i] = 0; } } + + /* If by any chance there was a need for a 3D array of register + * files, we would have to check whether SrcRegisterDim is followed + * by a dimension register and continue the saga. + */ } swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index fc40a25e09..ac4b239910 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -178,6 +178,16 @@ struct tgsi_exec_labels #define TGSI_EXEC_MAX_LOOP_NESTING 20 #define TGSI_EXEC_MAX_CALL_NESTING 20 +/* The maximum number of input attributes per vertex. For 2D + * input register files, this is the stride between two 1D + * arrays. + */ +#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17 + +/* The maximum number of constant vectors per constant buffer. + */ +#define TGSI_EXEC_MAX_CONST_BUFFER 4096 + /** * Run-time virtual machine state for executing TGSI shader. */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index f93db18114..8dfd2ced08 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -527,7 +527,7 @@ emit_func_call_dst( void (PIPE_CDECL *code)() ) { struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - unsigned i, n, xmm; + unsigned i, n; unsigned xmm_mask; /* Bitmask of the xmm registers to save */ @@ -563,7 +563,7 @@ emit_func_call_dst( sse_movups( func, x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ), - make_xmm( xmm ) ); + make_xmm( i ) ); ++n; } @@ -581,7 +581,7 @@ emit_func_call_dst( if(xmm_mask & (1 << i)) { sse_movups( func, - make_xmm( xmm ), + make_xmm( i ), x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) ); ++n; } diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index a1a51d7ef2..0d019808b0 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -136,8 +136,10 @@ void _debug_vprintf(const char *format, va_list ap) #elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) /* TODO */ #else /* !PIPE_SUBSYSTEM_WINDOWS */ +#ifdef DEBUG vfprintf(stderr, format, ap); #endif +#endif } diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c index 5b3cab4642..9c5f616ceb 100644 --- a/src/gallium/auxiliary/util/u_math.c +++ b/src/gallium/auxiliary/util/u_math.c @@ -30,7 +30,7 @@ #include "util/u_math.h" -/** 2^x, for x in [-1.0, 1.0[ */ +/** 2^x, for x in [-1.0, 1.0] */ float pow2_table[POW2_TABLE_SIZE]; @@ -43,7 +43,7 @@ init_pow2_table(void) } -/** log2(x), for x in [1.0, 2.0[ */ +/** log2(x), for x in [1.0, 2.0] */ float log2_table[LOG2_TABLE_SIZE]; diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index d2eaa2e7f7..fdaec8df82 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -161,6 +161,12 @@ static INLINE float logf( float f ) return (float) log( (double) f ); } +static INLINE double log2( double x ) +{ + const double invln2 = 1.442695041; + return log( x ) * invln2; +} + #else /* Work-around an extra semi-colon in VS 2005 logf definition */ #ifdef logf |