summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2008-05-22 13:46:06 +0100
committerKeith Whitwell <keith@tungstengraphics.com>2008-05-23 09:16:57 +0100
commit05029c919d46299ca259ee8af880d0a65f95ce7c (patch)
tree1dfe178cc278b0b1a7f1ac272257e68485493594 /src/gallium
parentc684ffa02d8d43ee04b99ee63ccd1adb66e81c1a (diff)
draw: clean up masked writes in aos sse, make some xmm function names clearer
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c236
1 files changed, 82 insertions, 154 deletions
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 0b8600696a..708ecadbac 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -171,7 +171,7 @@ static boolean is_xmm_tmp( struct aos_compilation *cp,
cp->xmm[reg.idx].file == TGSI_FILE_NULL);
}
-static struct x86_reg get_xmm_tmp( struct aos_compilation *cp,
+static struct x86_reg get_xmm_clone( struct aos_compilation *cp,
struct x86_reg reg )
{
if (!is_xmm_tmp(cp, reg)) {
@@ -380,31 +380,37 @@ static void emit_pshufd( struct aos_compilation *cp,
}
}
-
-
-
-/* Helper for writemask:
+/* load masks (pack into negs??)
+ * pshufd - shuffle according to writemask
+ * and - result, mask
+ * nand - dest, mask
+ * or - dest, result
*/
-static boolean emit_shuf_copy1( struct aos_compilation *cp,
- struct x86_reg dst,
- struct x86_reg arg0,
- struct x86_reg arg1,
- ubyte shuf )
+static boolean mask_write( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg result,
+ unsigned mask )
{
+ struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- sse_movups(cp->func, dst, arg1);
- emit_pshufd(cp, dst, dst, shuf);
- emit_pshufd(cp, tmp, arg0, shuf);
-
- sse_movss(cp->func, dst, tmp);
+
+ emit_pshufd(cp, tmp, imm_swz,
+ SHUF((mask & 1) ? 2 : 3,
+ (mask & 2) ? 2 : 3,
+ (mask & 4) ? 2 : 3,
+ (mask & 8) ? 2 : 3));
- emit_pshufd(cp, dst, dst, shuf);
+ sse_andps(cp->func, dst, tmp);
+ sse_andnps(cp->func, tmp, result);
+ sse_orps(cp->func, dst, tmp);
aos_release_xmm_reg(cp, tmp.idx);
return TRUE;
}
+
+
/* Helper for writemask:
*/
static boolean emit_shuf_copy2( struct aos_compilation *cp,
@@ -414,17 +420,18 @@ static boolean emit_shuf_copy2( struct aos_compilation *cp,
ubyte shuf )
{
struct x86_reg tmp = aos_get_xmm_reg(cp);
+
emit_pshufd(cp, dst, arg1, shuf);
emit_pshufd(cp, tmp, arg0, shuf);
-
sse_shufps(cp->func, dst, tmp, SHUF(X, Y, Z, W));
-
emit_pshufd(cp, dst, dst, shuf);
aos_release_xmm_reg(cp, tmp.idx);
return TRUE;
}
+
+
#define SSE_SWIZZLE_NOOP ((0<<0) | (1<<2) | (2<<4) | (3<<6))
@@ -593,131 +600,58 @@ static void store_dest( struct aos_compilation *cp,
const struct tgsi_full_dst_register *reg,
struct x86_reg result )
{
- if (reg->DstRegister.WriteMask == 0)
- {
- return;
- }
- else if (reg->DstRegister.WriteMask == TGSI_WRITEMASK_XYZW)
- {
- if (result.file == file_XMM) {
- aos_adopt_xmm_reg(cp,
- result,
- reg->DstRegister.File,
- reg->DstRegister.Index,
- TRUE);
- }
- else {
- struct x86_reg dst = aos_get_xmm_reg(cp);
- aos_adopt_xmm_reg(cp,
- dst,
- reg->DstRegister.File,
- reg->DstRegister.Index,
- TRUE);
- sse_movups(cp->func, dst, result);
- }
- }
- else
- {
- /* Previous value of the dest register:
- */
- struct x86_reg old_dst = aos_get_shader_reg(cp,
- reg->DstRegister.File,
- reg->DstRegister.Index);
-
-
- /* Alloc an xmm reg to hold the new value of the dest register:
- */
- struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg dst;
+ switch (reg->DstRegister.WriteMask) {
+ case 0:
+ return;
+
+ case TGSI_WRITEMASK_XYZW:
aos_adopt_xmm_reg(cp,
- dst,
+ get_xmm_clone(cp, result),
reg->DstRegister.File,
reg->DstRegister.Index,
- TRUE );
-
- switch (reg->DstRegister.WriteMask) {
- case TGSI_WRITEMASK_X:
- if (result.file == file_XMM) {
- sse_movups(cp->func, dst, old_dst);
- sse_movss(cp->func, dst, result);
- }
- else {
- struct x86_reg tmp = aos_get_xmm_reg(cp);
- sse_movups(cp->func, dst, old_dst);
- sse_movss(cp->func, tmp, result);
- sse_movss(cp->func, dst, tmp);
- aos_release_xmm_reg(cp, tmp.idx);
- }
- break;
-
- case TGSI_WRITEMASK_XY:
- sse_movups(cp->func, dst, old_dst);
- sse_shufps(cp->func, dst, result, SHUF(X, Y, Z, W));
- break;
-
- case TGSI_WRITEMASK_ZW:
- sse_movups(cp->func, dst, result);
- sse_shufps(cp->func, dst, old_dst, SHUF(X, Y, Z, W));
- break;
-
- case TGSI_WRITEMASK_YZW:
- if (old_dst.file == file_XMM) {
- sse_movups(cp->func, dst, result);
- sse_movss(cp->func, dst, old_dst);
- }
- else {
- struct x86_reg tmp = aos_get_xmm_reg(cp);
- sse_movups(cp->func, dst, result);
- sse_movss(cp->func, tmp, old_dst);
- sse_movss(cp->func, dst, tmp);
- aos_release_xmm_reg(cp, tmp.idx);
- }
- break;
-
- case TGSI_WRITEMASK_Y:
- emit_shuf_copy1(cp, dst, result, old_dst, SHUF(Y,X,Z,W));
- break;
-
- case TGSI_WRITEMASK_Z:
- emit_shuf_copy1(cp, dst, result, old_dst, SHUF(Z,Y,X,W));
- break;
-
- case TGSI_WRITEMASK_W:
- emit_shuf_copy1(cp, dst, result, old_dst, SHUF(W,Y,Z,X));
- break;
-
- case TGSI_WRITEMASK_XZ:
- emit_shuf_copy2(cp, dst, result, old_dst, SHUF(X,Z,Y,W));
- break;
+ TRUE);
+ return;
+ default:
+ break;
+ }
- case TGSI_WRITEMASK_XW:
- emit_shuf_copy2(cp, dst, result, old_dst, SHUF(X,W,Z,Y));
+ dst = aos_get_shader_reg_xmm(cp,
+ reg->DstRegister.File,
+ reg->DstRegister.Index);
- case TGSI_WRITEMASK_YZ:
- emit_shuf_copy2(cp, dst, result, old_dst, SHUF(Z,Y,X,W));
- break;
+ switch (reg->DstRegister.WriteMask) {
+ case TGSI_WRITEMASK_X:
+ sse_movss(cp->func, dst, get_xmm_clone(cp, result));
+ break;
+
+ case TGSI_WRITEMASK_XY:
+ sse_shufps(cp->func, dst, get_xmm_clone(cp, result), SHUF(X, Y, Z, W));
+ break;
- case TGSI_WRITEMASK_YW:
- emit_shuf_copy2(cp, dst, result, old_dst, SHUF(W,Y,Z,X));
- break;
+ case TGSI_WRITEMASK_ZW:
+ result = get_xmm_clone(cp, result);
+ sse_shufps(cp->func, result, dst, SHUF(X, Y, Z, W));
+ dst = result;
+ break;
- case TGSI_WRITEMASK_XZW:
- emit_shuf_copy1(cp, dst, old_dst, result, SHUF(Y,X,Z,W));
- break;
+ case TGSI_WRITEMASK_YZW:
+ sse_movss(cp->func, result, dst);
+ dst = result;
+ break;
- case TGSI_WRITEMASK_XYW:
- emit_shuf_copy1(cp, dst, old_dst, result, SHUF(Z,Y,X,W));
- break;
+ default:
+ mask_write(cp, dst, result, reg->DstRegister.WriteMask);
+ break;
+ }
- case TGSI_WRITEMASK_XYZ:
- emit_shuf_copy1(cp, dst, old_dst, result, SHUF(W,Y,Z,X));
- break;
+ aos_adopt_xmm_reg(cp,
+ dst,
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE);
- default:
- assert(0); /* not possible */
- break;
- }
- }
}
@@ -837,7 +771,7 @@ static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, dst, neg);
sse_maxps(cp->func, dst, arg0);
@@ -850,7 +784,7 @@ static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_addps(cp->func, dst, arg1);
@@ -874,7 +808,7 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, dst, arg1);
@@ -898,7 +832,7 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, dst, arg1);
@@ -920,7 +854,7 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, dst, arg1);
@@ -1216,7 +1150,7 @@ static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_maxps(cp->func, dst, arg1);
@@ -1229,7 +1163,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_minps(cp->func, dst, arg1);
@@ -1240,7 +1174,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst
static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
/* potentially nothing to do */
@@ -1252,7 +1186,7 @@ static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, dst, arg1);
@@ -1270,7 +1204,7 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
/* If we can't clobber old contents of arg0, get a temporary & copy
* it there, then clobber it...
*/
- arg0 = get_xmm_tmp(cp, arg0);
+ arg0 = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, arg0, arg1);
sse_addps(cp->func, arg0, arg2);
@@ -1336,7 +1270,7 @@ static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_cmpps(cp->func, dst, arg1, cc_NotLessThan);
sse_andps(cp->func, dst, ones);
@@ -1360,7 +1294,7 @@ static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_cmpps(cp->func, dst, arg1, cc_LessThan);
sse_andps(cp->func, dst, ones);
@@ -1373,7 +1307,7 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_subps(cp->func, dst, arg1);
@@ -1526,9 +1460,9 @@ emit_instruction( struct aos_compilation *cp,
static boolean emit_viewport( struct aos_compilation *cp )
{
- struct x86_reg pos = aos_get_shader_reg(cp,
- TGSI_FILE_OUTPUT,
- 0);
+ struct x86_reg pos = aos_get_shader_reg_xmm(cp,
+ TGSI_FILE_OUTPUT,
+ 0);
struct x86_reg scale = x86_make_disp(cp->machine_EDX,
Offset(struct aos_machine, scale));
@@ -1536,12 +1470,6 @@ static boolean emit_viewport( struct aos_compilation *cp )
struct x86_reg translate = x86_make_disp(cp->machine_EDX,
Offset(struct aos_machine, translate));
- if (pos.file != file_XMM) {
- struct x86_reg dst = aos_get_xmm_reg(cp);
- sse_movups(cp->func, dst, pos);
- pos = dst;
- }
-
sse_mulps(cp->func, pos, scale);
sse_addps(cp->func, pos, translate);