summaryrefslogtreecommitdiff
path: root/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp')
-rw-r--r--src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp869
1 files changed, 869 insertions, 0 deletions
diff --git a/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp
new file mode 100644
index 0000000000..615ce8c255
--- /dev/null
+++ b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp
@@ -0,0 +1,869 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "d3d1xstutil.h"
+#include "sm4.h"
+#include "tgsi/tgsi_ureg.h"
+#include <vector>
+
+#if 1
+#define check(x) assert(x)
+#define fail(x) assert(0 && (x))
+#else
+#define check(x) do {if(!(x)) throw(#x);} while(0)
+#define fail(x) throw(x)
+#endif
+
+struct tgsi_interpolation
+{
+ unsigned interpolation;
+ bool centroid;
+};
+
+static tgsi_interpolation sm4_to_pipe_interpolation[] =
+{
+ {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* UNDEFINED */
+ {TGSI_INTERPOLATE_CONSTANT, false},
+ {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* LINEAR */
+ {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_CENTROID */
+ {TGSI_INTERPOLATE_LINEAR, false}, /* LINEAR_NOPERSPECTIVE */
+ {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_CENTROID */
+
+ // Added in D3D10.1
+ {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_SAMPLE */
+ {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_SAMPLE */
+};
+
+static int sm4_to_pipe_sv[] =
+{
+ -1,
+ TGSI_SEMANTIC_POSITION,
+ -1, /*TGSI_SEMANTIC_CLIP_DISTANCE */
+ -1, /*TGSI_SEMANTIC_CULL_DISTANCE */
+ -1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */
+ -1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */
+ -1, /*TGSI_SEMANTIC_VERTEXID,*/
+ TGSI_SEMANTIC_PRIMID,
+ TGSI_SEMANTIC_INSTANCEID,
+ TGSI_SEMANTIC_FACE,
+ -1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/
+};
+
+struct sm4_to_tgsi_converter
+{
+ struct ureg_program* ureg;
+ std::vector<struct ureg_dst> temps;
+ std::vector<struct ureg_dst> outputs;
+ std::vector<struct ureg_src> inputs;
+ std::vector<struct ureg_src> samplers;
+ std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison
+ std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison
+ std::vector<std::pair<unsigned, unsigned> > loops;
+ sm4_insn* insn;
+ struct sm4_program& program;
+ std::vector<unsigned> sm4_to_tgsi_insn_num;
+ std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num;
+ bool in_sub;
+ bool avoid_txf;
+ bool avoid_int;
+
+ sm4_to_tgsi_converter(struct sm4_program& program)
+ : program(program)
+ {
+ avoid_txf = true;
+ avoid_int = false;
+ }
+
+ struct ureg_dst _reg(sm4_op& op)
+ {
+ switch(op.file)
+ {
+ case SM4_FILE_NULL:
+ {
+ struct ureg_dst d;
+ memset(&d, 0, sizeof(d));
+ d.File = TGSI_FILE_NULL;
+ return d;
+ }
+ case SM4_FILE_TEMP:
+ check(op.has_simple_index());
+ check(op.indices[0].disp < temps.size());
+ return temps[op.indices[0].disp];
+ case SM4_FILE_OUTPUT:
+ check(op.has_simple_index());
+ check(op.indices[0].disp < outputs.size());
+ return outputs[op.indices[0].disp];
+ default:
+ check(0);
+ return ureg_dst_undef();
+ }
+ }
+
+ struct ureg_dst _dst(unsigned i = 0)
+ {
+ check(i < insn->num_ops);
+ sm4_op& op = *insn->ops[i];
+ check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR);
+ struct ureg_dst d = ureg_writemask(_reg(op), op.mask);
+ if(insn->insn.sat)
+ d = ureg_saturate(d);
+ return d;
+ }
+
+ struct ureg_src _src(unsigned i)
+ {
+ check(i < insn->num_ops);
+ sm4_op& op = *insn->ops[i];
+ struct ureg_src s;
+ switch(op.file)
+ {
+ case SM4_FILE_IMMEDIATE32:
+ s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32);
+ break;
+ case SM4_FILE_INPUT:
+ check(op.is_index_simple(0));
+ check(op.num_indices == 1 || op.num_indices == 2);
+ // TODO: is this correct, or are incorrectly swapping the two indices in the GS case?
+ check(op.indices[op.num_indices - 1].disp < inputs.size());
+ s = inputs[op.indices[op.num_indices - 1].disp];
+ if(op.num_indices == 2)
+ {
+ s.Dimension = 1;
+ s.DimensionIndex = op.indices[0].disp;
+ }
+ break;
+ case SM4_FILE_CONSTANT_BUFFER:
+ // TODO: indirect addressing
+ check(op.num_indices == 2);
+ check(op.is_index_simple(0));
+ check(op.is_index_simple(1));
+ s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp);
+ s.Dimension = 1;
+ s.DimensionIndex = op.indices[0].disp;
+ break;
+ default:
+ s = ureg_src(_reg(op));
+ break;
+ }
+ if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR)
+ s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]);
+ else
+ {
+ /* immediates are masked to show needed values */
+ check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64);
+ }
+ if(op.abs)
+ s = ureg_abs(s);
+ if(op.neg)
+ s = ureg_negate(s);
+ return s;
+ };
+
+ int _idx(sm4_file file, unsigned i = 0)
+ {
+ check(i < insn->num_ops);
+ sm4_op& op = *insn->ops[i];
+ check(op.file == file);
+ check(op.has_simple_index());
+ return (int)op.indices[0].disp;
+ }
+
+ int _texslot(bool have_sampler = true)
+ {
+ std::map<std::pair<int, int>, int>::iterator i;
+ i = program.resource_sampler_to_slot.find(std::make_pair(_idx(SM4_FILE_RESOURCE, 2), have_sampler ? _idx(SM4_FILE_SAMPLER, 3) : -1));
+ check(i != program.resource_sampler_to_slot.end());
+ return i->second;
+ }
+
+ unsigned tex_target(unsigned texslot)
+ {
+ unsigned mode = sampler_modes[program.slot_to_sampler[texslot]];
+ unsigned target;
+ if(mode)
+ target = targets[program.slot_to_resource[texslot]].second;
+ else
+ target = targets[program.slot_to_resource[texslot]].first;
+ check(target);
+ return target;
+ }
+
+ std::vector<struct ureg_dst> insn_tmps;
+
+ struct ureg_dst _tmp()
+ {
+ struct ureg_dst t = ureg_DECL_temporary(ureg);
+ insn_tmps.push_back(t);
+ return t;
+ }
+
+ struct ureg_dst _tmp(struct ureg_dst d)
+ {
+ if(d.File == TGSI_FILE_TEMPORARY)
+ return d;
+ else
+ return ureg_writemask(_tmp(), d.WriteMask);
+ }
+
+#define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break
+#define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break
+#define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break
+#define OP1(n) OP1_(n, n)
+#define OP2(n) OP2_(n, n)
+#define OP3(n) OP3_(n, n)
+#define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break;
+
+ void translate_insns(unsigned begin, unsigned end)
+ {
+ for(unsigned insn_num = begin; insn_num < end; ++insn_num)
+ {
+ sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg);
+ unsigned label;
+ insn = program.insns[insn_num];
+ bool ok;
+ ok = true;
+ switch(insn->opcode)
+ {
+ // trivial instructions
+ case SM4_OPCODE_NOP:
+ break;
+ OP1(MOV);
+
+ // float
+ OP2(ADD);
+ OP2(MUL);
+ OP3(MAD);
+ OP2(DIV);
+ OP1(FRC);
+ OP1(RCP);
+ OP2(MIN);
+ OP2(MAX);
+ OP2_(LT, SLT);
+ OP2_(GE, SGE);
+ OP2_(EQ, SEQ);
+ OP2_(NE, SNE);
+
+ // bitwise
+ OP1(NOT);
+ OP2(AND);
+ OP2(OR);
+ OP2(XOR);
+
+ // special mathematical
+ OP2(DP2);
+ OP2(DP3);
+ OP2(DP4);
+ OP1(RSQ);
+ OP1_(LOG, LG2);
+ OP1_(EXP, EX2);
+
+ // rounding
+ OP1_(ROUND_NE, ROUND);
+ OP1_(ROUND_Z, TRUNC);
+ OP1_(ROUND_PI, CEIL);
+ OP1_(ROUND_NI, FLR);
+
+ // cross-thread
+ OP1_(DERIV_RTX, DDX);
+ OP1_(DERIV_RTX_COARSE, DDX);
+ OP1_(DERIV_RTX_FINE, DDX);
+ OP1_(DERIV_RTY, DDY);
+ OP1_(DERIV_RTY_COARSE, DDY);
+ OP1_(DERIV_RTY_FINE, DDY);
+ case SM4_OPCODE_EMIT:
+ ureg_EMIT(ureg);
+ break;
+ case SM4_OPCODE_CUT:
+ ureg_ENDPRIM(ureg);
+ break;
+ case SM4_OPCODE_EMITTHENCUT:
+ ureg_EMIT(ureg);
+ ureg_ENDPRIM(ureg);
+ break;
+
+ // non-trivial instructions
+ case SM4_OPCODE_MOVC:
+ /* CMP checks for < 0, but MOVC checks for != 0
+ * but fortunately, x != 0 is equivalent to -abs(x) < 0
+ * XXX: can test_nz apply to this?!
+ */
+ ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3));
+ break;
+ case SM4_OPCODE_SQRT:
+ {
+ struct ureg_dst d = _dst();
+ struct ureg_dst t = _tmp(d);
+ ureg_RSQ(ureg, t, _src(1));
+ ureg_RCP(ureg, d, ureg_src(t));
+ break;
+ }
+ case SM4_OPCODE_SINCOS:
+ {
+ struct ureg_dst s = _dst(0);
+ struct ureg_dst c = _dst(1);
+ struct ureg_src v = _src(2);
+ if(s.File != TGSI_FILE_NULL)
+ ureg_SIN(ureg, s, v);
+ if(c.File != TGSI_FILE_NULL)
+ ureg_COS(ureg, c, v);
+ break;
+ }
+
+ // control flow
+ case SM4_OPCODE_DISCARD:
+ ureg_KIL(ureg, _src(0));
+ break;
+ OP_CF(LOOP, BGNLOOP);
+ OP_CF(ENDLOOP, ENDLOOP);
+ case SM4_OPCODE_BREAK:
+ ureg_BRK(ureg);
+ break;
+ case SM4_OPCODE_BREAKC:
+ // XXX: can test_nz apply to this?!
+ ureg_BREAKC(ureg, _src(0));
+ break;
+ case SM4_OPCODE_CONTINUE:
+ ureg_CONT(ureg);
+ break;
+ case SM4_OPCODE_CONTINUEC:
+ // XXX: can test_nz apply to this?!
+ ureg_IF(ureg, _src(0), &label);
+ ureg_CONT(ureg);
+ ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
+ ureg_ENDIF(ureg);
+ break;
+ case SM4_OPCODE_SWITCH:
+ ureg_SWITCH(ureg, _src(0));
+ break;
+ case SM4_OPCODE_CASE:
+ ureg_CASE(ureg, _src(0));
+ break;
+ case SM4_OPCODE_DEFAULT:
+ ureg_DEFAULT(ureg);
+ break;
+ case SM4_OPCODE_ENDSWITCH:
+ ureg_ENDSWITCH(ureg);
+ break;
+ case SM4_OPCODE_CALL:
+ ureg_CAL(ureg, &label);
+ label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)]));
+ break;
+ case SM4_OPCODE_LABEL:
+ if(in_sub)
+ ureg_ENDSUB(ureg);
+ else
+ ureg_END(ureg);
+ ureg_BGNSUB(ureg);
+ in_sub = true;
+ break;
+ case SM4_OPCODE_RET:
+ if(in_sub || insn_num != (program.insns.size() - 1))
+ ureg_RET(ureg);
+ break;
+ case SM4_OPCODE_RETC:
+ ureg_IF(ureg, _src(0), &label);
+ if(insn->insn.test_nz)
+ ureg_RET(ureg);
+ ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
+ if(!insn->insn.test_nz)
+ {
+ ureg_ELSE(ureg, &label);
+ ureg_RET(ureg);
+ ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
+ }
+ ureg_ENDIF(ureg);
+ break;
+ OP_CF(ELSE, ELSE);
+ case SM4_OPCODE_ENDIF:
+ ureg_ENDIF(ureg);
+ break;
+ case SM4_OPCODE_IF:
+ if(insn->insn.test_nz)
+ {
+ ureg_IF(ureg, _src(0), &label);
+ label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num]));
+ }
+ else
+ {
+ unsigned linked = program.cf_insn_linked[insn_num];
+ if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF)
+ {
+ ureg_IF(ureg, _src(0), &label);
+ ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
+ ureg_ELSE(ureg, &label);
+ label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
+ }
+ else
+ {
+ /* we have to swap the branches in this case (fun!)
+ * TODO: maybe just emit a SEQ 0?
+ * */
+ unsigned endif = program.cf_insn_linked[linked];
+
+ ureg_IF(ureg, _src(0), &label);
+ label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
+
+ translate_insns(linked + 1, endif);
+
+ sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg);
+ ureg_ELSE(ureg, &label);
+ label_to_sm4_insn_num.push_back(std::make_pair(label, endif));
+
+ translate_insns(insn_num + 1, linked);
+
+ insn_num = endif - 1;
+ goto next;
+ }
+ }
+ break;
+ case SM4_OPCODE_RESINFO:
+ {
+ std::map<int, int>::iterator i;
+ i = program.resource_to_slot.find(_idx(SM4_FILE_RESOURCE, 2));
+ check(i != program.resource_to_slot.end());
+ unsigned texslot = i->second;
+
+ // no driver actually provides this, unfortunately
+ ureg_TXQ(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]);
+ break;
+ };
+ // TODO: sample offset, sample index
+ case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg (ouch)
+ case SM4_OPCODE_LD_MS:
+ {
+ unsigned texslot = _texslot(false);
+ unsigned dim = 0;
+ switch(targets[texslot].first)
+ {
+ case TGSI_TEXTURE_1D:
+ dim = 1;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ dim = 2;
+ break;
+ case TGSI_TEXTURE_3D:
+ dim = 3;
+ break;
+ default:
+ check(0);
+ }
+ struct ureg_dst tmp = _tmp();
+ if(avoid_txf)
+ {
+ struct ureg_src texcoord;
+ if(!avoid_int)
+ {
+ ureg_I2F(ureg, tmp, _src(1));
+ texcoord = ureg_src(tmp);
+ }
+ else
+ texcoord = _src(1);
+
+ ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_swizzle(texcoord, 0, 1, 2, dim), samplers[texslot]);
+ }
+ else
+ ureg_TXF(ureg, _dst(), tex_target(texslot), ureg_swizzle(_src(1), 0, 1, 2, dim), samplers[texslot]);
+ break;
+ }
+ case SM4_OPCODE_SAMPLE: // dst, coord, res, samp
+ {
+ unsigned texslot = _texslot();
+ ureg_TEX(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]);
+ break;
+ }
+ case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x
+ {
+ unsigned texslot = _texslot();
+ struct ureg_dst tmp = _tmp();
+ ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1));
+ ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0));
+ ureg_TXB(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
+ break;
+ }
+ case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x
+ {
+ unsigned texslot = _texslot();
+ struct ureg_dst tmp = _tmp();
+ ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1));
+ ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0));
+ ureg_TEX(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
+ break;
+ }
+ case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x
+ {
+ unsigned texslot = _texslot();
+ struct ureg_dst tmp = _tmp();
+ ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1));
+ ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0));
+ ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 0.0));
+ ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
+ break;
+ }
+ case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy
+ {
+ unsigned texslot = _texslot();
+ ureg_TXD(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot], _src(4), _src(5));
+ break;
+ }
+ case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x
+ {
+ unsigned texslot = _texslot();
+ struct ureg_dst tmp = _tmp();
+ ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1));
+ ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0));
+ ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
+ break;
+ }
+ default:
+ ok = false;
+ break;
+ }
+
+ if(!ok && !avoid_int)
+ {
+ ok = true;
+ switch(insn->opcode)
+ {
+ // integer
+ OP1_(ITOF, I2F);
+ OP1_(FTOI, F2I);
+ OP2_(IADD, UADD);
+ OP1(INEG);
+ OP2_(IMUL, UMUL);
+ OP3_(IMAD, UMAD);
+ OP2_(ISHL, SHL);
+ OP2_(ISHR, ISHR);
+ OP2(IMIN);
+ OP2(IMAX);
+ OP2_(ILT, ISLT);
+ OP2_(IGE, ISGE);
+ OP2_(IEQ, USEQ);
+ OP2_(INE, USNE);
+
+ // unsigned
+ OP1_(UTOF, U2F);
+ OP1_(FTOU, F2U);
+ OP2(UMUL);
+ OP3(UMAD);
+ OP2(UMIN);
+ OP2(UMAX);
+ OP2_(ULT, USLT);
+ OP2_(UGE, USGE);
+ OP2(USHR);
+
+ case SM4_OPCODE_UDIV:
+ {
+ struct ureg_dst q = _dst(0);
+ struct ureg_dst r = _dst(1);
+ struct ureg_src a = _src(2);
+ struct ureg_src b = _src(3);
+ if(q.File != TGSI_FILE_NULL)
+ ureg_UDIV(ureg, q, a, b);
+ if(r.File != TGSI_FILE_NULL)
+ ureg_UMOD(ureg, r, a, b);
+ break;
+ }
+ default:
+ ok = false;
+ }
+ }
+
+ if(!ok && avoid_int)
+ {
+ ok = true;
+ switch(insn->opcode)
+ {
+ case SM4_OPCODE_ITOF:
+ case SM4_OPCODE_UTOF:
+ break;
+ OP1_(FTOI, TRUNC);
+ OP1_(FTOU, FLR);
+ // integer
+ OP2_(IADD, ADD);
+ OP2_(IMUL, MUL);
+ OP3_(IMAD, MAD);
+ OP2_(MIN, MIN);
+ OP2_(MAX, MAX);
+ OP2_(ILT, SLT);
+ OP2_(IGE, SGE);
+ OP2_(IEQ, SEQ);
+ OP2_(INE, SNE);
+
+ // unsigned
+ OP2_(UMUL, MUL);
+ OP3_(UMAD, MAD);
+ OP2_(UMIN, MIN);
+ OP2_(UMAX, MAX);
+ OP2_(ULT, SLT);
+ OP2_(UGE, SGE);
+
+ case SM4_OPCODE_INEG:
+ ureg_MOV(ureg, _dst(), ureg_negate(_src(1)));
+ break;
+ case SM4_OPCODE_ISHL:
+ {
+ struct ureg_dst d = _dst();
+ struct ureg_dst t = _tmp(d);
+ ureg_EX2(ureg, t, _src(2));
+ ureg_MUL(ureg, d, ureg_src(t), _src(1));
+ break;
+ }
+ case SM4_OPCODE_ISHR:
+ case SM4_OPCODE_USHR:
+ {
+ struct ureg_dst d = _dst();
+ struct ureg_dst t = _tmp(d);
+ ureg_EX2(ureg, t, ureg_negate(_src(2)));
+ ureg_MUL(ureg, t, ureg_src(t), _src(1));
+ ureg_FLR(ureg, d, ureg_src(t));
+ break;
+ }
+ case SM4_OPCODE_UDIV:
+ {
+ struct ureg_dst q = _dst(0);
+ struct ureg_dst r = _dst(1);
+ struct ureg_src a = _src(2);
+ struct ureg_src b = _src(3);
+ struct ureg_dst f = _tmp();
+ ureg_DIV(ureg, f, a, b);
+ if(q.File != TGSI_FILE_NULL)
+ ureg_FLR(ureg, q, ureg_src(f));
+ if(r.File != TGSI_FILE_NULL)
+ {
+ ureg_FRC(ureg, f, ureg_src(f));
+ ureg_MUL(ureg, r, ureg_src(f), b);
+ }
+ break;
+ }
+ default:
+ ok = false;
+ }
+ }
+
+ check(ok);
+
+ if(!insn_tmps.empty())
+ {
+ for(unsigned i = 0; i < insn_tmps.size(); ++i)
+ ureg_release_temporary(ureg, insn_tmps[i]);
+ insn_tmps.clear();
+ }
+next:;
+ }
+ }
+
+ void* do_translate()
+ {
+ unsigned processor;
+ switch(program.version.type)
+ {
+ case 0:
+ processor = TGSI_PROCESSOR_FRAGMENT;
+ break;
+ case 1:
+ processor = TGSI_PROCESSOR_VERTEX;
+ break;
+ case 2:
+ processor = TGSI_PROCESSOR_GEOMETRY;
+ break;
+ default:
+ fail("Tessellation and compute shaders not yet supported");
+ return 0;
+ }
+
+ if(!sm4_link_cf_insns(program))
+ fail("Malformed control flow");
+ if(!sm4_find_labels(program))
+ fail("Failed to locate labels");
+ if(!sm4_allocate_resource_sampler_pairs(program))
+ fail("Unsupported (indirect?) accesses to resources and/or samplers");
+
+ ureg = ureg_create(processor);
+
+ in_sub = false;
+
+ for(unsigned i = 0; i < program.slot_to_resource.size(); ++i)
+ samplers.push_back(ureg_DECL_sampler(ureg, i));
+
+ sm4_to_tgsi_insn_num.resize(program.insns.size());
+ for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num)
+ {
+ sm4_dcl& dcl = *program.dcls[insn_num];
+ int idx = -1;
+ if(dcl.op.get() && dcl.op->is_index_simple(0))
+ idx = dcl.op->indices[0].disp;
+ switch(dcl.opcode)
+ {
+ case SM4_OPCODE_DCL_GLOBAL_FLAGS:
+ break;
+ case SM4_OPCODE_DCL_TEMPS:
+ for(unsigned i = 0; i < dcl.num; ++i)
+ temps.push_back(ureg_DECL_temporary(ureg));
+ break;
+ case SM4_OPCODE_DCL_INPUT:
+ check(idx >= 0);
+ if(processor == TGSI_PROCESSOR_VERTEX)
+ {
+ if(inputs.size() <= (unsigned)idx)
+ inputs.resize(idx + 1);
+ inputs[idx] = ureg_DECL_vs_input(ureg, idx);
+ }
+ else if(processor == TGSI_PROCESSOR_GEOMETRY)
+ {
+ // TODO: is this correct?
+ unsigned gsidx = dcl.op->indices[1].disp;
+ if(inputs.size() <= (unsigned)gsidx)
+ inputs.resize(gsidx + 1);
+ inputs[gsidx] = ureg_DECL_gs_input(ureg, gsidx, TGSI_SEMANTIC_GENERIC, gsidx);
+ }
+ else
+ check(0);
+ break;
+ case SM4_OPCODE_DCL_INPUT_PS:
+ check(idx >= 0);
+ if(inputs.size() <= (unsigned)idx)
+ inputs.resize(idx + 1);
+ inputs[idx] = ureg_DECL_fs_input_cyl_centroid(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].interpolation, 0, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].centroid);
+ break;
+ case SM4_OPCODE_DCL_OUTPUT:
+ check(idx >= 0);
+ if(outputs.size() <= (unsigned)idx)
+ outputs.resize(idx + 1);
+ if(processor == TGSI_PROCESSOR_FRAGMENT)
+ outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx);
+ else
+ outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx);
+ break;
+ case SM4_OPCODE_DCL_INPUT_SIV:
+ case SM4_OPCODE_DCL_INPUT_SGV:
+ case SM4_OPCODE_DCL_INPUT_PS_SIV:
+ case SM4_OPCODE_DCL_INPUT_PS_SGV:
+ check(idx >= 0);
+ if(inputs.size() <= (unsigned)idx)
+ inputs.resize(idx + 1);
+ // TODO: is this correct?
+ inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0);
+ break;
+ case SM4_OPCODE_DCL_OUTPUT_SIV:
+ case SM4_OPCODE_DCL_OUTPUT_SGV:
+ check(idx >= 0);
+ if(outputs.size() <= (unsigned)idx)
+ outputs.resize(idx + 1);
+ check(sm4_to_pipe_sv[dcl.sv] >= 0);
+ outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0);
+ break;
+ case SM4_OPCODE_DCL_RESOURCE:
+ check(idx >= 0);
+ if(targets.size() <= (unsigned)idx)
+ targets.resize(idx + 1);
+ switch(dcl.dcl_resource.target)
+ {
+ case SM4_TARGET_TEXTURE1D:
+ targets[idx].first = TGSI_TEXTURE_1D;
+ targets[idx].second = TGSI_TEXTURE_SHADOW1D;
+ break;
+ case SM4_TARGET_TEXTURE2D:
+ targets[idx].first = TGSI_TEXTURE_2D;
+ targets[idx].second = TGSI_TEXTURE_SHADOW2D;
+ break;
+ case SM4_TARGET_TEXTURE3D:
+ targets[idx].first = TGSI_TEXTURE_3D;
+ targets[idx].second = 0;
+ break;
+ case SM4_TARGET_TEXTURECUBE:
+ targets[idx].first = TGSI_TEXTURE_CUBE;
+ targets[idx].second = 0;
+ break;
+ default:
+ // HACK to make SimpleSample10 work
+ //check(0);
+ targets[idx].first = TGSI_TEXTURE_2D;
+ targets[idx].second = TGSI_TEXTURE_SHADOW2D;
+ break;
+ }
+ break;
+ case SM4_OPCODE_DCL_SAMPLER:
+ check(idx >= 0);
+ if(sampler_modes.size() <= (unsigned)idx)
+ sampler_modes.resize(idx + 1);
+ check(!dcl.dcl_sampler.mono);
+ sampler_modes[idx] = dcl.dcl_sampler.shadow;
+ break;
+ case SM4_OPCODE_DCL_CONSTANT_BUFFER:
+ check(dcl.op->num_indices == 2);
+ check(dcl.op->is_index_simple(0));
+ check(dcl.op->is_index_simple(1));
+ idx = dcl.op->indices[0].disp;
+ ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx);
+ break;
+ case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
+ ureg_property_gs_input_prim(ureg, d3d_to_pipe_prim_type[dcl.dcl_gs_input_primitive.primitive]);
+ break;
+ case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
+ ureg_property_gs_output_prim(ureg, d3d_to_pipe_prim[dcl.dcl_gs_output_primitive_topology.primitive_topology]);
+ break;
+ case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
+ ureg_property_gs_max_vertices(ureg, dcl.num);
+ break;
+ default:
+ check(0);
+ }
+ }
+
+ translate_insns(0, program.insns.size());
+ sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg));
+ if(in_sub)
+ ureg_ENDSUB(ureg);
+ else
+ ureg_END(ureg);
+
+ for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i)
+ ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]);
+
+ const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0);
+ ureg_destroy(ureg);
+ return (void*)tokens;
+ }
+
+ void* translate()
+ {
+ try
+ {
+ return do_translate();
+ }
+ catch(const char*)
+ {
+ return 0;
+ }
+ }
+};
+
+void* sm4_to_tgsi(struct sm4_program& program)
+{
+ sm4_to_tgsi_converter conv(program);
+ return conv.translate();
+}