summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c1
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc.c10
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc.h6
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c7
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h380
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c65
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h10
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c6
-rw-r--r--src/gallium/auxiliary/util/p_debug.c2
-rw-r--r--src/gallium/auxiliary/util/u_math.c4
-rw-r--r--src/gallium/auxiliary/util/u_math.h6
-rw-r--r--src/gallium/drivers/cell/common.h6
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c48
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c262
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.h2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.c1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c19
-rw-r--r--src/gallium/drivers/cell/ppu/cell_surface.c1
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c6
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c6
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h10
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c3
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.h3
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c20
24 files changed, 458 insertions, 426 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 3c175f31d8..18f24e5980 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -35,6 +35,7 @@
#include "draw/draw_pt.h"
#include "draw/draw_vs.h"
#include "tgsi/tgsi_dump.h"
+#include "util/u_math.h"
static unsigned trim( unsigned count, unsigned first, unsigned incr )
{
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
index 6d11263be8..b65bfa7bbd 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
@@ -251,7 +251,7 @@ union vx_inst {
} inst;
};
-static inline void
+static INLINE void
emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB)
{
union vx_inst inst;
@@ -276,7 +276,7 @@ union vxr_inst {
} inst;
};
-static inline void
+static INLINE void
emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB)
{
union vxr_inst inst;
@@ -302,7 +302,7 @@ union va_inst {
} inst;
};
-static inline void
+static INLINE void
emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC)
{
union va_inst inst;
@@ -419,7 +419,7 @@ union d_inst {
} inst;
};
-static inline void
+static INLINE void
emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si)
{
union d_inst inst;
@@ -446,7 +446,7 @@ union a_inst {
} inst;
};
-static inline void
+static INLINE void
emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2,
uint rc)
{
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h
index afb4704c39..08212a2a25 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h
@@ -245,6 +245,9 @@ extern void
ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm);
extern void
+ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm);
+
+extern void
ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb);
extern void
@@ -310,6 +313,9 @@ ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset);
extern void
ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb);
+extern void
+ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset);
+
/**
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index f8568f690b..1bd9f1c8dd 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -958,9 +958,12 @@ spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsig
void
spe_splat(struct spe_function *p, unsigned rT, unsigned rA)
{
+ /* Use a temporary, just in case rT == rA */
+ unsigned int tmp_reg = spe_allocate_available_register(p);
/* Duplicate bytes 0, 1, 2, and 3 across the whole register */
- spe_ila(p, rT, 0x00010203);
- spe_shufb(p, rT, rA, rA, rT);
+ spe_ila(p, tmp_reg, 0x00010203);
+ spe_shufb(p, rT, rA, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
index d6a3c02f20..f1500cef29 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
@@ -100,199 +100,199 @@ extern void spe_comment(struct spe_function *p, int rel_indent, const char *s);
#endif /* RTASM_PPC_SPE_H */
#ifndef EMIT_
-#define EMIT_(name, _op) \
- extern void _name (struct spe_function *p, unsigned rT)
+#define EMIT_(_name, _op) \
+ extern void _name (struct spe_function *p, unsigned rT);
#define EMIT_R(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA)
+ extern void _name (struct spe_function *p, unsigned rT, unsigned rA);
#define EMIT_RR(_name, _op) \
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- unsigned rB)
+ unsigned rB);
#define EMIT_RRR(_name, _op) \
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- unsigned rB, unsigned rC)
+ unsigned rB, unsigned rC);
#define EMIT_RI7(_name, _op) \
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- int imm)
+ int imm);
#define EMIT_RI8(_name, _op, bias) \
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- int imm)
+ int imm);
#define EMIT_RI10(_name, _op) \
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- int imm)
+ int imm);
#define EMIT_RI10s(_name, _op) \
extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- int imm)
+ int imm);
#define EMIT_RI16(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, int imm)
+ extern void _name (struct spe_function *p, unsigned rT, int imm);
#define EMIT_RI18(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, int imm)
+ extern void _name (struct spe_function *p, unsigned rT, int imm);
#define EMIT_I16(_name, _op) \
- extern void _name (struct spe_function *p, int imm)
+ extern void _name (struct spe_function *p, int imm);
#define UNDEF_EMIT_MACROS
#endif /* EMIT_ */
/* Memory load / store instructions
*/
-EMIT_RR (spe_lqx, 0x1c4);
-EMIT_RI16(spe_lqa, 0x061);
-EMIT_RI16(spe_lqr, 0x067);
-EMIT_RR (spe_stqx, 0x144);
-EMIT_RI16(spe_stqa, 0x041);
-EMIT_RI16(spe_stqr, 0x047);
-EMIT_RI7 (spe_cbd, 0x1f4);
-EMIT_RR (spe_cbx, 0x1d4);
-EMIT_RI7 (spe_chd, 0x1f5);
-EMIT_RI7 (spe_chx, 0x1d5);
-EMIT_RI7 (spe_cwd, 0x1f6);
-EMIT_RI7 (spe_cwx, 0x1d6);
-EMIT_RI7 (spe_cdd, 0x1f7);
-EMIT_RI7 (spe_cdx, 0x1d7);
+EMIT_RR (spe_lqx, 0x1c4)
+EMIT_RI16(spe_lqa, 0x061)
+EMIT_RI16(spe_lqr, 0x067)
+EMIT_RR (spe_stqx, 0x144)
+EMIT_RI16(spe_stqa, 0x041)
+EMIT_RI16(spe_stqr, 0x047)
+EMIT_RI7 (spe_cbd, 0x1f4)
+EMIT_RR (spe_cbx, 0x1d4)
+EMIT_RI7 (spe_chd, 0x1f5)
+EMIT_RI7 (spe_chx, 0x1d5)
+EMIT_RI7 (spe_cwd, 0x1f6)
+EMIT_RI7 (spe_cwx, 0x1d6)
+EMIT_RI7 (spe_cdd, 0x1f7)
+EMIT_RI7 (spe_cdx, 0x1d7)
/* Constant formation instructions
*/
-EMIT_RI16(spe_ilh, 0x083);
-EMIT_RI16(spe_ilhu, 0x082);
-EMIT_RI16(spe_il, 0x081);
-EMIT_RI18(spe_ila, 0x021);
-EMIT_RI16(spe_iohl, 0x0c1);
-EMIT_RI16(spe_fsmbi, 0x065);
+EMIT_RI16(spe_ilh, 0x083)
+EMIT_RI16(spe_ilhu, 0x082)
+EMIT_RI16(spe_il, 0x081)
+EMIT_RI18(spe_ila, 0x021)
+EMIT_RI16(spe_iohl, 0x0c1)
+EMIT_RI16(spe_fsmbi, 0x065)
/* Integer and logical instructions
*/
-EMIT_RR (spe_ah, 0x0c8);
-EMIT_RI10(spe_ahi, 0x01d);
-EMIT_RR (spe_a, 0x0c0);
-EMIT_RI10s(spe_ai, 0x01c);
-EMIT_RR (spe_sfh, 0x048);
-EMIT_RI10(spe_sfhi, 0x00d);
-EMIT_RR (spe_sf, 0x040);
-EMIT_RI10(spe_sfi, 0x00c);
-EMIT_RR (spe_addx, 0x340);
-EMIT_RR (spe_cg, 0x0c2);
-EMIT_RR (spe_cgx, 0x342);
-EMIT_RR (spe_sfx, 0x341);
-EMIT_RR (spe_bg, 0x042);
-EMIT_RR (spe_bgx, 0x343);
-EMIT_RR (spe_mpy, 0x3c4);
-EMIT_RR (spe_mpyu, 0x3cc);
-EMIT_RI10(spe_mpyi, 0x074);
-EMIT_RI10(spe_mpyui, 0x075);
-EMIT_RRR (spe_mpya, 0x00c);
-EMIT_RR (spe_mpyh, 0x3c5);
-EMIT_RR (spe_mpys, 0x3c7);
-EMIT_RR (spe_mpyhh, 0x3c6);
-EMIT_RR (spe_mpyhha, 0x346);
-EMIT_RR (spe_mpyhhu, 0x3ce);
-EMIT_RR (spe_mpyhhau, 0x34e);
-EMIT_R (spe_clz, 0x2a5);
-EMIT_R (spe_cntb, 0x2b4);
-EMIT_R (spe_fsmb, 0x1b6);
-EMIT_R (spe_fsmh, 0x1b5);
-EMIT_R (spe_fsm, 0x1b4);
-EMIT_R (spe_gbb, 0x1b2);
-EMIT_R (spe_gbh, 0x1b1);
-EMIT_R (spe_gb, 0x1b0);
-EMIT_RR (spe_avgb, 0x0d3);
-EMIT_RR (spe_absdb, 0x053);
-EMIT_RR (spe_sumb, 0x253);
-EMIT_R (spe_xsbh, 0x2b6);
-EMIT_R (spe_xshw, 0x2ae);
-EMIT_R (spe_xswd, 0x2a6);
-EMIT_RR (spe_and, 0x0c1);
-EMIT_RR (spe_andc, 0x2c1);
-EMIT_RI10s(spe_andbi, 0x016);
-EMIT_RI10s(spe_andhi, 0x015);
-EMIT_RI10s(spe_andi, 0x014);
-EMIT_RR (spe_or, 0x041);
-EMIT_RR (spe_orc, 0x2c9);
-EMIT_RI10s(spe_orbi, 0x006);
-EMIT_RI10s(spe_orhi, 0x005);
-EMIT_RI10s(spe_ori, 0x004);
-EMIT_R (spe_orx, 0x1f0);
-EMIT_RR (spe_xor, 0x241);
-EMIT_RI10s(spe_xorbi, 0x026);
-EMIT_RI10s(spe_xorhi, 0x025);
-EMIT_RI10s(spe_xori, 0x024);
-EMIT_RR (spe_nand, 0x0c9);
-EMIT_RR (spe_nor, 0x049);
-EMIT_RR (spe_eqv, 0x249);
-EMIT_RRR (spe_selb, 0x008);
-EMIT_RRR (spe_shufb, 0x00b);
+EMIT_RR (spe_ah, 0x0c8)
+EMIT_RI10(spe_ahi, 0x01d)
+EMIT_RR (spe_a, 0x0c0)
+EMIT_RI10s(spe_ai, 0x01c)
+EMIT_RR (spe_sfh, 0x048)
+EMIT_RI10(spe_sfhi, 0x00d)
+EMIT_RR (spe_sf, 0x040)
+EMIT_RI10(spe_sfi, 0x00c)
+EMIT_RR (spe_addx, 0x340)
+EMIT_RR (spe_cg, 0x0c2)
+EMIT_RR (spe_cgx, 0x342)
+EMIT_RR (spe_sfx, 0x341)
+EMIT_RR (spe_bg, 0x042)
+EMIT_RR (spe_bgx, 0x343)
+EMIT_RR (spe_mpy, 0x3c4)
+EMIT_RR (spe_mpyu, 0x3cc)
+EMIT_RI10(spe_mpyi, 0x074)
+EMIT_RI10(spe_mpyui, 0x075)
+EMIT_RRR (spe_mpya, 0x00c)
+EMIT_RR (spe_mpyh, 0x3c5)
+EMIT_RR (spe_mpys, 0x3c7)
+EMIT_RR (spe_mpyhh, 0x3c6)
+EMIT_RR (spe_mpyhha, 0x346)
+EMIT_RR (spe_mpyhhu, 0x3ce)
+EMIT_RR (spe_mpyhhau, 0x34e)
+EMIT_R (spe_clz, 0x2a5)
+EMIT_R (spe_cntb, 0x2b4)
+EMIT_R (spe_fsmb, 0x1b6)
+EMIT_R (spe_fsmh, 0x1b5)
+EMIT_R (spe_fsm, 0x1b4)
+EMIT_R (spe_gbb, 0x1b2)
+EMIT_R (spe_gbh, 0x1b1)
+EMIT_R (spe_gb, 0x1b0)
+EMIT_RR (spe_avgb, 0x0d3)
+EMIT_RR (spe_absdb, 0x053)
+EMIT_RR (spe_sumb, 0x253)
+EMIT_R (spe_xsbh, 0x2b6)
+EMIT_R (spe_xshw, 0x2ae)
+EMIT_R (spe_xswd, 0x2a6)
+EMIT_RR (spe_and, 0x0c1)
+EMIT_RR (spe_andc, 0x2c1)
+EMIT_RI10s(spe_andbi, 0x016)
+EMIT_RI10s(spe_andhi, 0x015)
+EMIT_RI10s(spe_andi, 0x014)
+EMIT_RR (spe_or, 0x041)
+EMIT_RR (spe_orc, 0x2c9)
+EMIT_RI10s(spe_orbi, 0x006)
+EMIT_RI10s(spe_orhi, 0x005)
+EMIT_RI10s(spe_ori, 0x004)
+EMIT_R (spe_orx, 0x1f0)
+EMIT_RR (spe_xor, 0x241)
+EMIT_RI10s(spe_xorbi, 0x026)
+EMIT_RI10s(spe_xorhi, 0x025)
+EMIT_RI10s(spe_xori, 0x024)
+EMIT_RR (spe_nand, 0x0c9)
+EMIT_RR (spe_nor, 0x049)
+EMIT_RR (spe_eqv, 0x249)
+EMIT_RRR (spe_selb, 0x008)
+EMIT_RRR (spe_shufb, 0x00b)
/* Shift and rotate instructions
*/
-EMIT_RR (spe_shlh, 0x05f);
-EMIT_RI7 (spe_shlhi, 0x07f);
-EMIT_RR (spe_shl, 0x05b);
-EMIT_RI7 (spe_shli, 0x07b);
-EMIT_RR (spe_shlqbi, 0x1db);
-EMIT_RI7 (spe_shlqbii, 0x1fb);
-EMIT_RR (spe_shlqby, 0x1df);
-EMIT_RI7 (spe_shlqbyi, 0x1ff);
-EMIT_RR (spe_shlqbybi, 0x1cf);
-EMIT_RR (spe_roth, 0x05c);
-EMIT_RI7 (spe_rothi, 0x07c);
-EMIT_RR (spe_rot, 0x058);
-EMIT_RI7 (spe_roti, 0x078);
-EMIT_RR (spe_rotqby, 0x1dc);
-EMIT_RI7 (spe_rotqbyi, 0x1fc);
-EMIT_RR (spe_rotqbybi, 0x1cc);
-EMIT_RR (spe_rotqbi, 0x1d8);
-EMIT_RI7 (spe_rotqbii, 0x1f8);
-EMIT_RR (spe_rothm, 0x05d);
-EMIT_RI7 (spe_rothmi, 0x07d);
-EMIT_RR (spe_rotm, 0x059);
-EMIT_RI7 (spe_rotmi, 0x079);
-EMIT_RR (spe_rotqmby, 0x1dd);
-EMIT_RI7 (spe_rotqmbyi, 0x1fd);
-EMIT_RR (spe_rotqmbybi, 0x1cd);
-EMIT_RR (spe_rotqmbi, 0x1c9);
-EMIT_RI7 (spe_rotqmbii, 0x1f9);
-EMIT_RR (spe_rotmah, 0x05e);
-EMIT_RI7 (spe_rotmahi, 0x07e);
-EMIT_RR (spe_rotma, 0x05a);
-EMIT_RI7 (spe_rotmai, 0x07a);
+EMIT_RR (spe_shlh, 0x05f)
+EMIT_RI7 (spe_shlhi, 0x07f)
+EMIT_RR (spe_shl, 0x05b)
+EMIT_RI7 (spe_shli, 0x07b)
+EMIT_RR (spe_shlqbi, 0x1db)
+EMIT_RI7 (spe_shlqbii, 0x1fb)
+EMIT_RR (spe_shlqby, 0x1df)
+EMIT_RI7 (spe_shlqbyi, 0x1ff)
+EMIT_RR (spe_shlqbybi, 0x1cf)
+EMIT_RR (spe_roth, 0x05c)
+EMIT_RI7 (spe_rothi, 0x07c)
+EMIT_RR (spe_rot, 0x058)
+EMIT_RI7 (spe_roti, 0x078)
+EMIT_RR (spe_rotqby, 0x1dc)
+EMIT_RI7 (spe_rotqbyi, 0x1fc)
+EMIT_RR (spe_rotqbybi, 0x1cc)
+EMIT_RR (spe_rotqbi, 0x1d8)
+EMIT_RI7 (spe_rotqbii, 0x1f8)
+EMIT_RR (spe_rothm, 0x05d)
+EMIT_RI7 (spe_rothmi, 0x07d)
+EMIT_RR (spe_rotm, 0x059)
+EMIT_RI7 (spe_rotmi, 0x079)
+EMIT_RR (spe_rotqmby, 0x1dd)
+EMIT_RI7 (spe_rotqmbyi, 0x1fd)
+EMIT_RR (spe_rotqmbybi, 0x1cd)
+EMIT_RR (spe_rotqmbi, 0x1c9)
+EMIT_RI7 (spe_rotqmbii, 0x1f9)
+EMIT_RR (spe_rotmah, 0x05e)
+EMIT_RI7 (spe_rotmahi, 0x07e)
+EMIT_RR (spe_rotma, 0x05a)
+EMIT_RI7 (spe_rotmai, 0x07a)
/* Compare, branch, and halt instructions
*/
-EMIT_RR (spe_heq, 0x3d8);
-EMIT_RI10(spe_heqi, 0x07f);
-EMIT_RR (spe_hgt, 0x258);
-EMIT_RI10(spe_hgti, 0x04f);
-EMIT_RR (spe_hlgt, 0x2d8);
-EMIT_RI10(spe_hlgti, 0x05f);
-EMIT_RR (spe_ceqb, 0x3d0);
-EMIT_RI10(spe_ceqbi, 0x07e);
-EMIT_RR (spe_ceqh, 0x3c8);
-EMIT_RI10(spe_ceqhi, 0x07d);
-EMIT_RR (spe_ceq, 0x3c0);
-EMIT_RI10(spe_ceqi, 0x07c);
-EMIT_RR (spe_cgtb, 0x250);
-EMIT_RI10(spe_cgtbi, 0x04e);
-EMIT_RR (spe_cgth, 0x248);
-EMIT_RI10(spe_cgthi, 0x04d);
-EMIT_RR (spe_cgt, 0x240);
-EMIT_RI10(spe_cgti, 0x04c);
-EMIT_RR (spe_clgtb, 0x2d0);
-EMIT_RI10(spe_clgtbi, 0x05e);
-EMIT_RR (spe_clgth, 0x2c8);
-EMIT_RI10(spe_clgthi, 0x05d);
-EMIT_RR (spe_clgt, 0x2c0);
-EMIT_RI10(spe_clgti, 0x05c);
-EMIT_I16 (spe_br, 0x064);
-EMIT_I16 (spe_bra, 0x060);
-EMIT_RI16(spe_brsl, 0x066);
-EMIT_RI16(spe_brasl, 0x062);
-EMIT_RI16(spe_brnz, 0x042);
-EMIT_RI16(spe_brz, 0x040);
-EMIT_RI16(spe_brhnz, 0x046);
-EMIT_RI16(spe_brhz, 0x044);
+EMIT_RR (spe_heq, 0x3d8)
+EMIT_RI10(spe_heqi, 0x07f)
+EMIT_RR (spe_hgt, 0x258)
+EMIT_RI10(spe_hgti, 0x04f)
+EMIT_RR (spe_hlgt, 0x2d8)
+EMIT_RI10(spe_hlgti, 0x05f)
+EMIT_RR (spe_ceqb, 0x3d0)
+EMIT_RI10(spe_ceqbi, 0x07e)
+EMIT_RR (spe_ceqh, 0x3c8)
+EMIT_RI10(spe_ceqhi, 0x07d)
+EMIT_RR (spe_ceq, 0x3c0)
+EMIT_RI10(spe_ceqi, 0x07c)
+EMIT_RR (spe_cgtb, 0x250)
+EMIT_RI10(spe_cgtbi, 0x04e)
+EMIT_RR (spe_cgth, 0x248)
+EMIT_RI10(spe_cgthi, 0x04d)
+EMIT_RR (spe_cgt, 0x240)
+EMIT_RI10(spe_cgti, 0x04c)
+EMIT_RR (spe_clgtb, 0x2d0)
+EMIT_RI10(spe_clgtbi, 0x05e)
+EMIT_RR (spe_clgth, 0x2c8)
+EMIT_RI10(spe_clgthi, 0x05d)
+EMIT_RR (spe_clgt, 0x2c0)
+EMIT_RI10(spe_clgti, 0x05c)
+EMIT_I16 (spe_br, 0x064)
+EMIT_I16 (spe_bra, 0x060)
+EMIT_RI16(spe_brsl, 0x066)
+EMIT_RI16(spe_brasl, 0x062)
+EMIT_RI16(spe_brnz, 0x042)
+EMIT_RI16(spe_brz, 0x040)
+EMIT_RI16(spe_brhnz, 0x046)
+EMIT_RI16(spe_brhz, 0x044)
extern void
spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset);
@@ -375,46 +375,46 @@ spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB);
/* Floating-point instructions
*/
-EMIT_RR (spe_fa, 0x2c4);
-EMIT_RR (spe_dfa, 0x2cc);
-EMIT_RR (spe_fs, 0x2c5);
-EMIT_RR (spe_dfs, 0x2cd);
-EMIT_RR (spe_fm, 0x2c6);
-EMIT_RR (spe_dfm, 0x2ce);
-EMIT_RRR (spe_fma, 0x00e);
-EMIT_RR (spe_dfma, 0x35c);
-EMIT_RRR (spe_fnms, 0x00d);
-EMIT_RR (spe_dfnms, 0x35e);
-EMIT_RRR (spe_fms, 0x00f);
-EMIT_RR (spe_dfms, 0x35d);
-EMIT_RR (spe_dfnma, 0x35f);
-EMIT_R (spe_frest, 0x1b8);
-EMIT_R (spe_frsqest, 0x1b9);
-EMIT_RR (spe_fi, 0x3d4);
-EMIT_RI8 (spe_csflt, 0x1da, 155);
-EMIT_RI8 (spe_cflts, 0x1d8, 173);
-EMIT_RI8 (spe_cuflt, 0x1db, 155);
-EMIT_RI8 (spe_cfltu, 0x1d9, 173);
-EMIT_R (spe_frds, 0x3b9);
-EMIT_R (spe_fesd, 0x3b8);
-EMIT_RR (spe_dfceq, 0x3c3);
-EMIT_RR (spe_dfcmeq, 0x3cb);
-EMIT_RR (spe_dfcgt, 0x2c3);
-EMIT_RR (spe_dfcmgt, 0x2cb);
-EMIT_RI7 (spe_dftsv, 0x3bf);
-EMIT_RR (spe_fceq, 0x3c2);
-EMIT_RR (spe_fcmeq, 0x3ca);
-EMIT_RR (spe_fcgt, 0x2c2);
-EMIT_RR (spe_fcmgt, 0x2ca);
-EMIT_R (spe_fscrwr, 0x3ba);
-EMIT_ (spe_fscrrd, 0x398);
+EMIT_RR (spe_fa, 0x2c4)
+EMIT_RR (spe_dfa, 0x2cc)
+EMIT_RR (spe_fs, 0x2c5)
+EMIT_RR (spe_dfs, 0x2cd)
+EMIT_RR (spe_fm, 0x2c6)
+EMIT_RR (spe_dfm, 0x2ce)
+EMIT_RRR (spe_fma, 0x00e)
+EMIT_RR (spe_dfma, 0x35c)
+EMIT_RRR (spe_fnms, 0x00d)
+EMIT_RR (spe_dfnms, 0x35e)
+EMIT_RRR (spe_fms, 0x00f)
+EMIT_RR (spe_dfms, 0x35d)
+EMIT_RR (spe_dfnma, 0x35f)
+EMIT_R (spe_frest, 0x1b8)
+EMIT_R (spe_frsqest, 0x1b9)
+EMIT_RR (spe_fi, 0x3d4)
+EMIT_RI8 (spe_csflt, 0x1da, 155)
+EMIT_RI8 (spe_cflts, 0x1d8, 173)
+EMIT_RI8 (spe_cuflt, 0x1db, 155)
+EMIT_RI8 (spe_cfltu, 0x1d9, 173)
+EMIT_R (spe_frds, 0x3b9)
+EMIT_R (spe_fesd, 0x3b8)
+EMIT_RR (spe_dfceq, 0x3c3)
+EMIT_RR (spe_dfcmeq, 0x3cb)
+EMIT_RR (spe_dfcgt, 0x2c3)
+EMIT_RR (spe_dfcmgt, 0x2cb)
+EMIT_RI7 (spe_dftsv, 0x3bf)
+EMIT_RR (spe_fceq, 0x3c2)
+EMIT_RR (spe_fcmeq, 0x3ca)
+EMIT_RR (spe_fcgt, 0x2c2)
+EMIT_RR (spe_fcmgt, 0x2ca)
+EMIT_R (spe_fscrwr, 0x3ba)
+EMIT_ (spe_fscrrd, 0x398)
/* Channel instructions
*/
-EMIT_R (spe_rdch, 0x00d);
-EMIT_R (spe_rdchcnt, 0x00f);
-EMIT_R (spe_wrch, 0x10d);
+EMIT_R (spe_rdch, 0x00d)
+EMIT_R (spe_rdchcnt, 0x00f)
+EMIT_R (spe_wrch, 0x10d)
#ifdef UNDEF_EMIT_MACROS
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 1da04ab7e0..4a217454dd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1037,11 +1037,28 @@ fetch_source(
union tgsi_exec_channel index;
uint swizzle;
+ /* We start with a direct index into a register file.
+ *
+ * file[1],
+ * where:
+ * file = SrcRegister.File
+ * [1] = SrcRegister.Index
+ */
index.i[0] =
index.i[1] =
index.i[2] =
index.i[3] = reg->SrcRegister.Index;
+ /* There is an extra source register that indirectly subscripts
+ * a register file. The direct index now becomes an offset
+ * that is being added to the indirect register.
+ *
+ * file[ind[2].x+1],
+ * where:
+ * ind = SrcRegisterInd.File
+ * [2] = SrcRegisterInd.Index
+ * .x = SrcRegisterInd.SwizzleX
+ */
if (reg->SrcRegister.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
@@ -1078,19 +1095,31 @@ fetch_source(
}
}
- if( reg->SrcRegister.Dimension ) {
- switch( reg->SrcRegister.File ) {
+ /* There is an extra source register that is a second
+ * subscript to a register file. Effectively it means that
+ * the register file is actually a 2D array of registers.
+ *
+ * file[1][3] == file[1*sizeof(file[1])+3],
+ * where:
+ * [3] = SrcRegisterDim.Index
+ */
+ if (reg->SrcRegister.Dimension) {
+ /* The size of the first-order array depends on the register file type.
+ * We need to multiply the index to the first array to get an effective,
+ * "flat" index that points to the beginning of the second-order array.
+ */
+ switch (reg->SrcRegister.File) {
case TGSI_FILE_INPUT:
- index.i[0] *= 17;
- index.i[1] *= 17;
- index.i[2] *= 17;
- index.i[3] *= 17;
+ index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
break;
case TGSI_FILE_CONSTANT:
- index.i[0] *= 4096;
- index.i[1] *= 4096;
- index.i[2] *= 4096;
- index.i[3] *= 4096;
+ index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
break;
default:
assert( 0 );
@@ -1101,6 +1130,17 @@ fetch_source(
index.i[2] += reg->SrcRegisterDim.Index;
index.i[3] += reg->SrcRegisterDim.Index;
+ /* Again, the second subscript index can be addressed indirectly
+ * identically to the first one.
+ * Nothing stops us from indirectly addressing the indirect register,
+ * but there is no need for that, so we won't exercise it.
+ *
+ * file[1][ind[4].y+3],
+ * where:
+ * ind = SrcRegisterDimInd.File
+ * [4] = SrcRegisterDimInd.Index
+ * .y = SrcRegisterDimInd.SwizzleX
+ */
if (reg->SrcRegisterDim.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
@@ -1133,6 +1173,11 @@ fetch_source(
index.i[i] = 0;
}
}
+
+ /* If by any chance there was a need for a 3D array of register
+ * files, we would have to check whether SrcRegisterDim is followed
+ * by a dimension register and continue the saga.
+ */
}
swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index fc40a25e09..ac4b239910 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -178,6 +178,16 @@ struct tgsi_exec_labels
#define TGSI_EXEC_MAX_LOOP_NESTING 20
#define TGSI_EXEC_MAX_CALL_NESTING 20
+/* The maximum number of input attributes per vertex. For 2D
+ * input register files, this is the stride between two 1D
+ * arrays.
+ */
+#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17
+
+/* The maximum number of constant vectors per constant buffer.
+ */
+#define TGSI_EXEC_MAX_CONST_BUFFER 4096
+
/**
* Run-time virtual machine state for executing TGSI shader.
*/
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index f93db18114..8dfd2ced08 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -527,7 +527,7 @@ emit_func_call_dst(
void (PIPE_CDECL *code)() )
{
struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
- unsigned i, n, xmm;
+ unsigned i, n;
unsigned xmm_mask;
/* Bitmask of the xmm registers to save */
@@ -563,7 +563,7 @@ emit_func_call_dst(
sse_movups(
func,
x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ),
- make_xmm( xmm ) );
+ make_xmm( i ) );
++n;
}
@@ -581,7 +581,7 @@ emit_func_call_dst(
if(xmm_mask & (1 << i)) {
sse_movups(
func,
- make_xmm( xmm ),
+ make_xmm( i ),
x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) );
++n;
}
diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c
index a1a51d7ef2..0d019808b0 100644
--- a/src/gallium/auxiliary/util/p_debug.c
+++ b/src/gallium/auxiliary/util/p_debug.c
@@ -136,8 +136,10 @@ void _debug_vprintf(const char *format, va_list ap)
#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
/* TODO */
#else /* !PIPE_SUBSYSTEM_WINDOWS */
+#ifdef DEBUG
vfprintf(stderr, format, ap);
#endif
+#endif
}
diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c
index 5b3cab4642..9c5f616ceb 100644
--- a/src/gallium/auxiliary/util/u_math.c
+++ b/src/gallium/auxiliary/util/u_math.c
@@ -30,7 +30,7 @@
#include "util/u_math.h"
-/** 2^x, for x in [-1.0, 1.0[ */
+/** 2^x, for x in [-1.0, 1.0] */
float pow2_table[POW2_TABLE_SIZE];
@@ -43,7 +43,7 @@ init_pow2_table(void)
}
-/** log2(x), for x in [1.0, 2.0[ */
+/** log2(x), for x in [1.0, 2.0] */
float log2_table[LOG2_TABLE_SIZE];
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index d2eaa2e7f7..fdaec8df82 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -161,6 +161,12 @@ static INLINE float logf( float f )
return (float) log( (double) f );
}
+static INLINE double log2( double x )
+{
+ const double invln2 = 1.442695041;
+ return log( x ) * invln2;
+}
+
#else
/* Work-around an extra semi-colon in VS 2005 logf definition */
#ifdef logf
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index 87488ea2d7..a670ed3c6e 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -130,6 +130,9 @@
#define CELL_FENCE_EMITTED 1
#define CELL_FENCE_SIGNALLED 2
+#define CELL_FACING_FRONT 0
+#define CELL_FACING_BACK 1
+
struct cell_fence
{
/** There's a 16-byte status qword per SPU */
@@ -160,7 +163,8 @@ struct cell_command_fragment_ops
struct pipe_depth_stencil_alpha_state dsa;
struct pipe_blend_state blend;
struct pipe_blend_color blend_color;
- unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS];
+ unsigned code_front[SPU_MAX_FRAGMENT_OPS_INSTS];
+ unsigned code_back[SPU_MAX_FRAGMENT_OPS_INSTS];
};
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 5c41b264ac..96a1743fc1 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -880,6 +880,52 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
/**
+ * Emit 3-component vector normalize.
+ */
+static boolean
+emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int src_reg[3];
+ int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
+
+ spe_comment(gen->f, -4, "NRM3:");
+
+ src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+
+ /* t0 = x * x */
+ spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
+
+ /* t1 = y * y */
+ spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]);
+
+ /* t0 = z * z + t0 */
+ spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg);
+
+ /* t0 = t0 + t1 */
+ spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
+
+ /* t1 = 1.0 / sqrt(t0) */
+ spe_frsqest(gen->f, t1_reg, t0_reg);
+ spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
+
+ for (ch = 0; ch < 3; ch++) { /* NOTE: omit W channel */
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ /* dst = src[ch] * t1 */
+ spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ }
+ }
+
+ free_itemps(gen);
+ return true;
+}
+
+
+/**
* Emit cross product. See emit_ADD for comments.
*/
static boolean
@@ -1769,6 +1815,8 @@ emit_instruction(struct codegen *gen,
return emit_DP4(gen, inst);
case TGSI_OPCODE_DPH:
return emit_DPH(gen, inst);
+ case TGSI_OPCODE_NRM:
+ return emit_NRM3(gen, inst);
case TGSI_OPCODE_XPD:
return emit_XPD(gen, inst);
case TGSI_OPCODE_RCP:
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
index d9c3ff3f4d..82336d6635 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -1190,14 +1190,14 @@ gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state,
}
break;
- case PIPE_FUNC_GREATER:
+ case PIPE_FUNC_LESS:
if (state->value_mask == stencil_max_value) {
- /* stencil_pass = fragment_mask & (s > reference) */
+ /* stencil_pass = fragment_mask & (reference < s) */
spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
}
else {
- /* stencil_pass = fragment_mask & ((s&mask) > (reference&mask)) */
+ /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */
unsigned int tmp_masked_stencil = spe_allocate_available_register(f);
spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, state->value_mask & state->ref_value);
@@ -1206,7 +1206,7 @@ gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state,
}
break;
- case PIPE_FUNC_LESS:
+ case PIPE_FUNC_GREATER:
if (state->value_mask == stencil_max_value) {
/* stencil_pass = fragment_mask & (reference > s) */
/* There's no convenient Compare Less Than Immediate instruction, so
@@ -1233,9 +1233,9 @@ gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state,
}
break;
- case PIPE_FUNC_LEQUAL:
+ case PIPE_FUNC_GEQUAL:
if (state->value_mask == stencil_max_value) {
- /* stencil_pass = fragment_mask & (s <= reference)
+ /* stencil_pass = fragment_mask & (reference >= s)
* = fragment_mask & ~(s > reference) */
spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
@@ -1250,9 +1250,9 @@ gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state,
}
break;
- case PIPE_FUNC_GEQUAL:
+ case PIPE_FUNC_LEQUAL:
if (state->value_mask == stencil_max_value) {
- /* stencil_pass = fragment_mask & (s >= reference) ]
+ /* stencil_pass = fragment_mask & (reference <= s) ]
* = fragment_mask & ~(reference > s) */
/* As above, we have to do this by loading a register */
unsigned int tmp_reg = spe_allocate_available_register(f);
@@ -1412,144 +1412,72 @@ gen_stencil_values(struct spe_function *f, unsigned int stencil_op,
* and released by the corresponding spe_release_register_set() call.
*/
static void
-gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_alpha_state *dsa,
+gen_get_stencil_values(struct spe_function *f, const struct pipe_stencil_state *stencil,
+ const unsigned int depth_enabled,
unsigned int fbS_reg,
unsigned int *fail_reg, unsigned int *zfail_reg,
- unsigned int *zpass_reg, unsigned int *back_fail_reg,
- unsigned int *back_zfail_reg, unsigned int *back_zpass_reg)
+ unsigned int *zpass_reg)
{
- unsigned zfail_op, back_zfail_op;
+ unsigned zfail_op;
/* Stenciling had better be enabled here */
- ASSERT(dsa->stencil[0].enabled);
+ ASSERT(stencil->enabled);
/* If the depth test is not enabled, it is treated as though it always
- * passes. In particular, that means that the "zfail_op" (and the backfacing
- * counterpart, if active) are not considered - a failing stencil test will
- * trigger the "fail_op", and a passing stencil test will trigger the
- * "zpass_op".
+ * passes, which means that the zfail_op is not considered - a
+ * failing stencil test triggers the fail_op, and a passing one
+ * triggers the zpass_op
*
- * By overriding the operations in this case to be PIPE_STENCIL_OP_KEEP,
- * we keep them from being calculated.
+ * As an optimization, override calculation of the zfail_op values
+ * if they aren't going to be used. By setting the value of
+ * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed
+ * to match the incoming stencil values, and no calculation will
+ * be done.
*/
- if (dsa->depth.enabled) {
- zfail_op = dsa->stencil[0].zfail_op;
- back_zfail_op = dsa->stencil[1].zfail_op;
+ if (depth_enabled) {
+ zfail_op = stencil->zfail_op;
}
else {
zfail_op = PIPE_STENCIL_OP_KEEP;
- back_zfail_op = PIPE_STENCIL_OP_KEEP;
}
/* One-sided or front-facing stencil */
- if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP) {
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) {
*fail_reg = fbS_reg;
}
else {
*fail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[0].fail_op, dsa->stencil[0].ref_value,
+ gen_stencil_values(f, stencil->fail_op, stencil->ref_value,
0xff, fbS_reg, *fail_reg);
}
+ /* Check the possibly overridden value, not the structure value */
if (zfail_op == PIPE_STENCIL_OP_KEEP) {
*zfail_reg = fbS_reg;
}
- else if (zfail_op == dsa->stencil[0].fail_op) {
+ else if (zfail_op == stencil->fail_op) {
*zfail_reg = *fail_reg;
}
else {
*zfail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[0].zfail_op, dsa->stencil[0].ref_value,
+ gen_stencil_values(f, stencil->zfail_op, stencil->ref_value,
0xff, fbS_reg, *zfail_reg);
}
- if (dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP) {
+ if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
*zpass_reg = fbS_reg;
}
- else if (dsa->stencil[0].zpass_op == dsa->stencil[0].fail_op) {
+ else if (stencil->zpass_op == stencil->fail_op) {
*zpass_reg = *fail_reg;
}
- else if (dsa->stencil[0].zpass_op == zfail_op) {
+ else if (stencil->zpass_op == zfail_op) {
*zpass_reg = *zfail_reg;
}
else {
*zpass_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[0].zpass_op, dsa->stencil[0].ref_value,
+ gen_stencil_values(f, stencil->zpass_op, stencil->ref_value,
0xff, fbS_reg, *zpass_reg);
}
-
- /* If two-sided stencil is enabled, we have more work to do. */
- if (!dsa->stencil[1].enabled) {
- /* This just flags that the registers need not be deallocated later */
- *back_fail_reg = fbS_reg;
- *back_zfail_reg = fbS_reg;
- *back_zpass_reg = fbS_reg;
- }
- else {
- /* Same calculations as above, but for the back stencil */
- if (dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP) {
- *back_fail_reg = fbS_reg;
- }
- else if (dsa->stencil[1].fail_op == dsa->stencil[0].fail_op) {
- *back_fail_reg = *fail_reg;
- }
- else if (dsa->stencil[1].fail_op == zfail_op) {
- *back_fail_reg = *zfail_reg;
- }
- else if (dsa->stencil[1].fail_op == dsa->stencil[0].zpass_op) {
- *back_fail_reg = *zpass_reg;
- }
- else {
- *back_fail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[1].fail_op, dsa->stencil[1].ref_value,
- 0xff, fbS_reg, *back_fail_reg);
- }
-
- if (back_zfail_op == PIPE_STENCIL_OP_KEEP) {
- *back_zfail_reg = fbS_reg;
- }
- else if (back_zfail_op == dsa->stencil[0].fail_op) {
- *back_zfail_reg = *fail_reg;
- }
- else if (back_zfail_op == zfail_op) {
- *back_zfail_reg = *zfail_reg;
- }
- else if (back_zfail_op == dsa->stencil[0].zpass_op) {
- *back_zfail_reg = *zpass_reg;
- }
- else if (back_zfail_op == dsa->stencil[1].fail_op) {
- *back_zfail_reg = *back_fail_reg;
- }
- else {
- *back_zfail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[1].zfail_op, dsa->stencil[1].ref_value,
- 0xff, fbS_reg, *back_zfail_reg);
- }
-
- if (dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) {
- *back_zpass_reg = fbS_reg;
- }
- else if (dsa->stencil[1].zpass_op == dsa->stencil[0].fail_op) {
- *back_zpass_reg = *fail_reg;
- }
- else if (dsa->stencil[1].zpass_op == zfail_op) {
- *back_zpass_reg = *zfail_reg;
- }
- else if (dsa->stencil[1].zpass_op == dsa->stencil[0].zpass_op) {
- *back_zpass_reg = *zpass_reg;
- }
- else if (dsa->stencil[1].zpass_op == dsa->stencil[1].fail_op) {
- *back_zpass_reg = *back_fail_reg;
- }
- else if (dsa->stencil[1].zpass_op == back_zfail_op) {
- *back_zpass_reg = *back_zfail_reg;
- }
- else {
- *back_zfail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[1].zpass_op, dsa->stencil[1].ref_value,
- 0xff, fbS_reg, *back_zpass_reg);
- }
- } /* End of calculations for back-facing stencil */
}
/* Note that fbZ_reg may *not* be set on entry, if in fact
@@ -1559,7 +1487,7 @@ gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_a
static boolean
gen_stencil_depth_test(struct spe_function *f,
const struct pipe_depth_stencil_alpha_state *dsa,
- const int const facing_reg,
+ const uint facing,
const int mask_reg, const int fragZ_reg,
const int fbZ_reg, const int fbS_reg)
{
@@ -1571,6 +1499,8 @@ gen_stencil_depth_test(struct spe_function *f,
boolean need_to_calculate_stencil_values;
boolean need_to_writemask_stencil_values;
+ struct pipe_stencil_state *stencil;
+
/* Registers. We may or may not actually allocate these, depending
* on whether the state values indicate that we need them.
*/
@@ -1598,6 +1528,20 @@ gen_stencil_depth_test(struct spe_function *f,
spe_comment(f, 0, "Allocating stencil register set");
spe_allocate_register_set(f);
+ /* The facing we're given is the fragment facing; it doesn't
+ * exactly match the stencil facing. If stencil is enabled,
+ * but two-sided stencil is *not* enabled, we use the same
+ * stencil settings for both front- and back-facing fragments.
+ * We only use the "back-facing" stencil for backfacing fragments
+ * if two-sided stenciling is enabled.
+ */
+ if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) {
+ stencil = &dsa->stencil[1];
+ }
+ else {
+ stencil = &dsa->stencil[0];
+ }
+
/* Calculate the writemask. If the writemask is trivial (either
* all 0s, meaning that we don't need to calculate any stencil values
* because they're not going to change the stencil anyway, or all 1s,
@@ -1608,24 +1552,20 @@ gen_stencil_depth_test(struct spe_function *f,
* Note that if the backface stencil is *not* enabled, the backface
* stencil will have the same values as the frontface stencil.
*/
- if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) {
- /* No changes to any stencil values */
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zfail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
need_to_calculate_stencil_values = false;
need_to_writemask_stencil_values = false;
}
- else if (dsa->stencil[0].write_mask == 0x0 && dsa->stencil[1].write_mask == 0x0) {
+ else if (stencil->write_mask == 0x0) {
/* All changes are writemasked out, so no need to calculate
* what those changes might be, and no need to write anything back.
*/
need_to_calculate_stencil_values = false;
need_to_writemask_stencil_values = false;
}
- else if (dsa->stencil[0].write_mask == 0xff && dsa->stencil[1].write_mask == 0xff) {
+ else if (stencil->write_mask == 0xff) {
/* Still trivial, but a little less so. We need to write the stencil
* values, but we don't need to mask them.
*/
@@ -1645,14 +1585,7 @@ gen_stencil_depth_test(struct spe_function *f,
*/
spe_comment(f, 0, "Computing stencil writemask");
stencil_writemask_reg = spe_allocate_available_register(f);
- spe_load_uint(f, stencil_writemask_reg, dsa->stencil[0].write_mask);
- if (dsa->stencil[1].enabled && dsa->stencil[0].write_mask != dsa->stencil[1].write_mask) {
- unsigned int back_write_mask_reg = spe_allocate_available_register(f);
- spe_comment(f, 0, "Resolving two-sided stencil writemask");
- spe_load_uint(f, back_write_mask_reg, dsa->stencil[1].write_mask);
- spe_selb(f, stencil_writemask_reg, stencil_writemask_reg, back_write_mask_reg, facing_reg);
- spe_release_register(f, back_write_mask_reg);
- }
+ spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].write_mask);
}
/* At least one-sided stenciling must be on. Generate code that
@@ -1666,19 +1599,7 @@ gen_stencil_depth_test(struct spe_function *f,
*/
spe_comment(f, 0, "Running basic stencil test");
stencil_pass_reg = spe_allocate_available_register(f);
- gen_stencil_test(f, &dsa->stencil[0], 0xff, mask_reg, fbS_reg, stencil_pass_reg);
-
- /* If two-sided stenciling is on, generate code to run the stencil
- * test on the backfacing stencil as well, and combine the two results
- * into the one correct result based on facing.
- */
- if (dsa->stencil[1].enabled) {
- unsigned int temp_reg = spe_allocate_available_register(f);
- spe_comment(f, 0, "Running backface stencil test");
- gen_stencil_test(f, &dsa->stencil[1], 0xff, mask_reg, fbS_reg, temp_reg);
- spe_selb(f, stencil_pass_reg, stencil_pass_reg, temp_reg, facing_reg);
- spe_release_register(f, temp_reg);
- }
+ gen_stencil_test(f, stencil, 0xff, mask_reg, fbS_reg, stencil_pass_reg);
/* Generate code that, given the mask of valid fragments and the
* mask of valid fragments that passed the stencil test, computes
@@ -1698,9 +1619,6 @@ gen_stencil_depth_test(struct spe_function *f,
/* We may not need to calculate stencil values, if the writemask is off */
if (need_to_calculate_stencil_values) {
- unsigned int back_stencil_fail_values, back_stencil_pass_depth_fail_values, back_stencil_pass_depth_pass_values;
- unsigned int front_stencil_fail_values, front_stencil_pass_depth_fail_values, front_stencil_pass_depth_pass_values;
-
/* Generate code that calculates exactly which stencil values we need,
* without calculating the same value twice (say, if two different
* stencil ops have the same value). This code will work for one-sided
@@ -1715,51 +1633,11 @@ gen_stencil_depth_test(struct spe_function *f,
* This function will allocate a variant number of registers that
* will be released as part of the register set.
*/
- spe_comment(f, 0, "Computing stencil values");
- gen_get_stencil_values(f, dsa, fbS_reg,
- &front_stencil_fail_values, &front_stencil_pass_depth_fail_values,
- &front_stencil_pass_depth_pass_values, &back_stencil_fail_values,
- &back_stencil_pass_depth_fail_values, &back_stencil_pass_depth_pass_values);
-
- /* Tricky, tricky, tricky - the things we do to create optimal
- * code...
- *
- * The various stencil values registers may overlap with each other
- * and with fbS_reg arbitrarily (as any particular operation is
- * only calculated once and stored in one register, no matter
- * how many times it is used). So we can't change the values
- * within those registers directly - if we change a value in a
- * register that's being referenced by two different calculations,
- * we've just unwittingly changed the second value as well...
- *
- * Avoid this by allocating new registers to hold the results
- * (there may be 2, if the depth test is off, or 3, if it is on).
- * These will be released as part of the register set.
- */
- if (!dsa->stencil[1].enabled) {
- /* The easy case: if two-sided stenciling is *not* enabled, we
- * just use the front-sided values.
- */
- stencil_fail_values = front_stencil_fail_values;
- stencil_pass_depth_fail_values = front_stencil_pass_depth_fail_values;
- stencil_pass_depth_pass_values = front_stencil_pass_depth_pass_values;
- }
- else { /* two-sided stencil enabled */
- spe_comment(f, 0, "Resolving backface stencil values");
- /* Allocate new registers for the needed merged values */
- stencil_fail_values = spe_allocate_available_register(f);
- spe_selb(f, stencil_fail_values, front_stencil_fail_values, back_stencil_fail_values, facing_reg);
- if (dsa->depth.enabled) {
- stencil_pass_depth_fail_values = spe_allocate_available_register(f);
- spe_selb(f, stencil_pass_depth_fail_values, front_stencil_pass_depth_fail_values, back_stencil_pass_depth_fail_values, facing_reg);
- }
- else {
- stencil_pass_depth_fail_values = fbS_reg;
- }
- stencil_pass_depth_pass_values = spe_allocate_available_register(f);
- spe_selb(f, stencil_pass_depth_pass_values, front_stencil_pass_depth_pass_values, back_stencil_pass_depth_pass_values, facing_reg);
- }
- }
+ spe_comment(f, 0, facing == CELL_FACING_FRONT ? "Computing front-facing stencil values" : "Computing back-facing stencil values");
+ gen_get_stencil_values(f, stencil, dsa->depth.enabled, fbS_reg,
+ &stencil_fail_values, &stencil_pass_depth_fail_values,
+ &stencil_pass_depth_pass_values);
+ }
/* We now have all the stencil values we need. We also need
* the results of the depth test to figure out which
@@ -1896,10 +1774,12 @@ gen_stencil_depth_test(struct spe_function *f,
* should be much faster.
*
* \param cell the rendering context (in)
+ * \param facing whether the generated code is for front-facing or
+ * back-facing fragments
* \param f the generated function (out)
*/
void
-cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
+cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f)
{
const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil;
const struct pipe_blend_state *blend = cell->blend;
@@ -1917,7 +1797,8 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
const int fragB_reg = 10; /* vector float */
const int fragA_reg = 11; /* vector float */
const int mask_reg = 12; /* vector uint */
- const int facing_reg = 13; /* uint */
+
+ ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK);
/* offset of quad from start of tile
* XXX assuming 4-byte pixels for color AND Z/stencil!!!!
@@ -1932,7 +1813,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
if (cell->debug_flags & CELL_DEBUG_ASM) {
spe_print_code(f, true);
spe_indent(f, 8);
- spe_comment(f, -4, "Begin per-fragment ops");
+ spe_comment(f, -4, facing == CELL_FACING_FRONT ? "Begin front-facing per-fragment ops": "Begin back-facing per-fragment ops");
}
spe_allocate_register(f, x_reg);
@@ -1945,7 +1826,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_allocate_register(f, fragB_reg);
spe_allocate_register(f, fragA_reg);
spe_allocate_register(f, mask_reg);
- spe_allocate_register(f, facing_reg);
quad_offset_reg = spe_allocate_available_register(f);
fbRGBA_reg = spe_allocate_available_register(f);
@@ -1969,6 +1849,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_release_register(f, y2_reg);
}
+ /* Generate the alpha test, if needed. */
if (dsa->alpha.enabled) {
gen_alpha_test(dsa, f, mask_reg, fragA_reg);
}
@@ -2095,7 +1976,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
* gen_stencil_depth_test() function must ignore the
* fbZ_reg register if depth is not enabled.
*/
- write_depth_stencil = gen_stencil_depth_test(f, dsa, facing_reg, mask_reg, fragZ_reg, fbZ_reg, fbS_reg);
+ write_depth_stencil = gen_stencil_depth_test(f, dsa, facing, mask_reg, fragZ_reg, fbZ_reg, fbS_reg);
}
else if (dsa->depth.enabled) {
int zmask_reg = spe_allocate_available_register(f);
@@ -2211,6 +2092,9 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_release_register(f, quad_offset_reg);
if (cell->debug_flags & CELL_DEBUG_ASM) {
- spe_comment(f, -4, "End per-fragment ops");
+ char buffer[1024];
+ sprintf(buffer, "End %s-facing per-fragment ops: %d instructions",
+ facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst);
+ spe_comment(f, -4, buffer);
}
}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
index b59de198dc..21b35d1faf 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
@@ -31,7 +31,7 @@
extern void
-cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f);
+cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f);
#endif /* CELL_GEN_FRAGMENT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
index 825110c62b..81efd137c7 100644
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
@@ -35,6 +35,7 @@
#include "draw/draw_context.h"
#include "cell_context.h"
#include "cell_flush.h"
+#include "cell_pipe_state.h"
#include "cell_state.h"
#include "cell_texture.h"
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index dd2d7f7d1e..031b27f11f 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -75,23 +75,29 @@ lookup_fragment_ops(struct cell_context *cell)
* If not found, create/save new fragment ops command.
*/
if (!ops) {
- struct spe_function spe_code;
+ struct spe_function spe_code_front, spe_code_back;
if (0)
debug_printf("**** Create New Fragment Ops\n");
/* Prepare the buffer that will hold the generated code. */
- spe_init_func(&spe_code, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ spe_init_func(&spe_code_front, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ spe_init_func(&spe_code_back, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
- /* generate new code */
- cell_gen_fragment_function(cell, &spe_code);
+ /* generate new code. Always generate new code for both front-facing
+ * and back-facing fragments, even if it's the same code in both
+ * cases.
+ */
+ cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front);
+ cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back);
/* alloc new fragment ops command */
ops = CALLOC_STRUCT(cell_command_fragment_ops);
/* populate the new cell_command_fragment_ops object */
ops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
- memcpy(ops->code, spe_code.store, spe_code_size(&spe_code));
+ memcpy(ops->code_front, spe_code_front.store, spe_code_size(&spe_code_front));
+ memcpy(ops->code_back, spe_code_back.store, spe_code_size(&spe_code_back));
ops->dsa = *cell->depth_stencil;
ops->blend = *cell->blend;
@@ -99,7 +105,8 @@ lookup_fragment_ops(struct cell_context *cell)
util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL);
/* release rtasm buffer */
- spe_release_func(&spe_code);
+ spe_release_func(&spe_code_front);
+ spe_release_func(&spe_code_back);
}
else {
if (0)
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c
index 732c64082e..c9203fee08 100644
--- a/src/gallium/drivers/cell/ppu/cell_surface.c
+++ b/src/gallium/drivers/cell/ppu/cell_surface.c
@@ -27,6 +27,7 @@
#include "util/u_rect.h"
#include "cell_context.h"
+#include "cell_surface.h"
void
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
index d726622d94..d5faf4e3aa 100644
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -214,7 +214,8 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
/* Copy SPU code from batch buffer to spu buffer */
- memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
+ memcpy(spu.fragment_ops_code_front, fops->code_front, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
+ memcpy(spu.fragment_ops_code_back, fops->code_back, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
/* Copy state info (for fallback case only) */
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
@@ -234,7 +235,8 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
* raw state records that the fallback code requires.
*/
if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) {
- spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
+ spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) spu.fragment_ops_code_front;
+ spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) spu.fragment_ops_code_back;
}
else {
/* otherwise, the default fallback code remains in place */
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index c8bb251905..7033f6037d 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -63,7 +63,8 @@ one_time_init(void)
* This will normally be overriden by a code-gen'd function
* unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
*/
- spu.fragment_ops = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
}
@@ -90,7 +91,8 @@ main(main_param_t speid, main_param_t argp)
ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4);
ASSERT(sizeof(struct cell_command_render) % 8 == 0);
- ASSERT(((unsigned long) &spu.fragment_ops_code) % 8 == 0);
+ ASSERT(((unsigned long) &spu.fragment_ops_code_front) % 8 == 0);
+ ASSERT(((unsigned long) &spu.fragment_ops_code_back) % 8 == 0);
ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0);
one_time_init();
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 692790c9f3..24cf7d77ce 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -85,8 +85,7 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y,
vector float fragGreen,
vector float fragBlue,
vector float fragAlpha,
- vector unsigned int mask,
- uint facing);
+ vector unsigned int mask);
/** Function for running fragment program */
typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
@@ -170,9 +169,10 @@ struct spu_global
ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
/** Current fragment ops machine code, at 8-byte boundary */
- uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB;
- /** Current fragment ops function */
- spu_fragment_ops_func fragment_ops;
+ uint fragment_ops_code_front[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB;
+ uint fragment_ops_code_back[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB;
+ /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */
+ spu_fragment_ops_func fragment_ops[2];
/** Current fragment program machine code, at 8-byte boundary */
uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB;
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
index f8ffc70492..683664e8a4 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -75,8 +75,7 @@ spu_fallback_fragment_ops(uint x, uint y,
vector float fragG,
vector float fragB,
vector float fragA,
- vector unsigned int mask,
- uint facing)
+ vector unsigned int mask)
{
vector float frag_aos[4];
unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
index a61689c83a..f817abf046 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
@@ -38,8 +38,7 @@ spu_fallback_fragment_ops(uint x, uint y,
vector float fragGreen,
vector float fragBlue,
vector float fragAlpha,
- vector unsigned int mask,
- uint facing);
+ vector unsigned int mask);
#endif /* SPU_PER_FRAGMENT_OP */
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 5f908159bb..22e51a86ae 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -275,15 +275,20 @@ emit_quad( int x, int y, mask_t mask)
/* Execute per-fragment/quad operations, including:
* alpha test, z test, stencil test, blend and framebuffer writing.
+ * Note that there are two different fragment operations functions
+ * that can be called, one for front-facing fragments, and one
+ * for back-facing fragments. (Often the two are the same;
+ * but in some cases, like two-sided stenciling, they can be
+ * very different.) So choose the correct function depending
+ * on the calculated facing.
*/
- spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile,
+ spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile,
fragZ,
outputs[0*4+0],
outputs[0*4+1],
outputs[0*4+2],
outputs[0*4+3],
- mask,
- setup.facing);
+ mask);
}
}
}
@@ -519,7 +524,14 @@ setup_sort_vertices(const struct vertex_header *v0,
setup.oneOverArea = 1.0f / area;
- /* The product of area * sign indicates front/back orientation (0/1) */
+ /* The product of area * sign indicates front/back orientation (0/1).
+ * Just in case someone gets the bright idea of switching the front
+ * and back constants without noticing that we're assuming their
+ * values in this operation, also assert that the values are
+ * what we think they are.
+ */
+ ASSERT(CELL_FACING_FRONT == 0);
+ ASSERT(CELL_FACING_BACK == 1);
setup.facing = (area * sign > 0.0f)
^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);