/************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ /* * Authors: * Zack Rusin zack@tungstengraphics.com */ #ifdef MESA_LLVM #include "gallivm.h" #include "instructions.h" #include "loweringpass.h" #include "storage.h" #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" #include "pipe/tgsi/util/tgsi_parse.h" #include "pipe/tgsi/exec/tgsi_exec.h" #include "pipe/tgsi/util/tgsi_util.h" #include "pipe/tgsi/util/tgsi_build.h" #include "pipe/tgsi/util/tgsi_dump.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct gallivm_interpolate { int attrib; int chan; int type; }; struct gallivm_prog { llvm::Module *module; void *function; int num_consts; int id; enum gallivm_shader_type type; struct gallivm_interpolate interpolators[32*4]; //FIXME: this might not be enough for some shaders int num_interp; }; struct gallivm_cpu_engine { llvm::ExecutionEngine *engine; }; using namespace llvm; #include "llvm_base_shader.cpp" static int GLOBAL_ID = 0; static inline void AddStandardCompilePasses(PassManager &PM) { PM.add(new LoweringPass()); PM.add(createVerifierPass()); // Verify that input is correct PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp //PM.add(createStripSymbolsPass(true)); PM.add(createRaiseAllocationsPass()); // call %malloc -> malloc inst PM.add(createCFGSimplificationPass()); // Clean up disgusting code PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas PM.add(createGlobalOptimizerPass()); // Optimize out global vars PM.add(createGlobalDCEPass()); // Remove unused fns and globs PM.add(createIPConstantPropagationPass());// IP Constant Propagation PM.add(createDeadArgEliminationPass()); // Dead argument elimination PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE PM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE PM.add(createPruneEHPass()); // Remove dead EH info PM.add(createFunctionInliningPass()); // Inline small functions PM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args PM.add(createTailDuplicationPass()); // Simplify cfg by copying code PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl. PM.add(createCFGSimplificationPass()); // Merge & remove BBs PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas PM.add(createInstructionCombiningPass()); // Combine silly seq's PM.add(createCondPropagationPass()); // Propagate conditionals PM.add(createTailCallEliminationPass()); // Eliminate tail calls PM.add(createCFGSimplificationPass()); // Merge & remove BBs PM.add(createReassociatePass()); // Reassociate expressions PM.add(createLoopRotatePass()); PM.add(createLICMPass()); // Hoist loop invariants PM.add(createLoopUnswitchPass()); // Unswitch loops. PM.add(createLoopIndexSplitPass()); // Index split loops. PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc PM.add(createIndVarSimplifyPass()); // Canonicalize indvars PM.add(createLoopUnrollPass()); // Unroll small loops PM.add(createInstructionCombiningPass()); // Clean up after the unroller PM.add(createGVNPass()); // Remove redundancies PM.add(createSCCPPass()); // Constant prop with SCCP // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. PM.add(createInstructionCombiningPass()); PM.add(createCondPropagationPass()); // Propagate conditionals PM.add(createDeadStoreEliminationPass()); // Delete dead stores PM.add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE' PM.add(createCFGSimplificationPass()); // Merge & remove BBs PM.add(createSimplifyLibCallsPass()); // Library Call Optimizations PM.add(createDeadTypeEliminationPass()); // Eliminate dead types PM.add(createConstantMergePass()); // Merge dup global constants } static inline void add_interpolator(struct gallivm_prog *prog, struct gallivm_interpolate *interp) { prog->interpolators[prog->num_interp] = *interp; ++prog->num_interp; } static void translate_declaration(struct gallivm_prog *prog, llvm::Module *module, Storage *storage, struct tgsi_full_declaration *decl, struct tgsi_full_declaration *fd) { if (decl->Declaration.File == TGSI_FILE_INPUT) { unsigned first, last, mask; uint interp_method; assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE); first = decl->u.DeclarationRange.First; last = decl->u.DeclarationRange.Last; mask = decl->Declaration.UsageMask; /* Do not touch WPOS.xy */ if (first == 0) { mask &= ~TGSI_WRITEMASK_XY; if (mask == TGSI_WRITEMASK_NONE) { first++; if (first > last) { return; } } } interp_method = decl->Interpolation.Interpolate; if (mask == TGSI_WRITEMASK_XYZW) { unsigned i, j; for (i = first; i <= last; i++) { for (j = 0; j < NUM_CHANNELS; j++) { //interp( mach, i, j ); struct gallivm_interpolate interp; interp.type = interp_method; interp.attrib = i; interp.chan = j; add_interpolator(prog, &interp); } } } else { unsigned i, j; for( j = 0; j < NUM_CHANNELS; j++ ) { if( mask & (1 << j) ) { for( i = first; i <= last; i++ ) { struct gallivm_interpolate interp; interp.type = interp_method; interp.attrib = i; interp.chan = j; add_interpolator(prog, &interp); } } } } } } static void translate_immediate(Storage *storage, struct tgsi_full_immediate *imm) { float vec[4]; int i; for (i = 0; i < imm->Immediate.Size - 1; ++i) { switch( imm->Immediate.DataType ) { case TGSI_IMM_FLOAT32: vec[i] = imm->u.ImmediateFloat32[i].Float; break; default: assert( 0 ); } } storage->addImmediate(vec); } static inline llvm::Value * swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src, Storage *storage) { int swizzle = 0; int start = 1000; const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 + TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W; for (int k = 0; k < 4; ++k) { swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start; start /= 10; } if (swizzle != NO_SWIZZLE) { /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/ val = storage->shuffleVector(val, swizzle); } return val; } static void translate_instruction(llvm::Module *module, Storage *storage, Instructions *instr, struct tgsi_full_instruction *inst, struct tgsi_full_instruction *fi, unsigned instno) { llvm::Value *inputs[4]; inputs[0] = 0; inputs[1] = 0; inputs[2] = 0; inputs[3] = 0; for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; llvm::Value *val = 0; llvm::Value *indIdx = 0; if (src->SrcRegister.Indirect) { indIdx = storage->addrElement(src->SrcRegisterInd.Index); indIdx = storage->extractIndex(indIdx); } if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { val = storage->constElement(src->SrcRegister.Index, indIdx); } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { val = storage->inputElement(src->SrcRegister.Index, indIdx); } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { val = storage->tempElement(src->SrcRegister.Index); } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { val = storage->outputElement(src->SrcRegister.Index, indIdx); } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { val = storage->immediateElement(src->SrcRegister.Index); } else { fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); return; } inputs[i] = swizzleVector(val, src, storage); } /*if (inputs[0]) instr->printVector(inputs[0]); if (inputs[1]) instr->printVector(inputs[1]);*/ llvm::Value *out = 0; switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: { out = instr->arl(inputs[0]); } break; case TGSI_OPCODE_MOV: { out = inputs[0]; } break; case TGSI_OPCODE_LIT: { out = instr->lit(inputs[0]); } break; case TGSI_OPCODE_RCP: { out = instr->rcp(inputs[0]); } break; case TGSI_OPCODE_RSQ: { out = instr->rsq(inputs[0]); } break; case TGSI_OPCODE_EXP: break; case TGSI_OPCODE_LOG: break; case TGSI_OPCODE_MUL: { out = instr->mul(inputs[0], inputs[1]); } break; case TGSI_OPCODE_ADD: { out = instr->add(inputs[0], inputs[1]); } break; case TGSI_OPCODE_DP3: { out = instr->dp3(inputs[0], inputs[1]); } break; case TGSI_OPCODE_DP4: { out = instr->dp4(inputs[0], inputs[1]); } break; case TGSI_OPCODE_DST: { out = instr->dst(inputs[0], inputs[1]); } break; case TGSI_OPCODE_MIN: { out = instr->min(inputs[0], inputs[1]); } break; case TGSI_OPCODE_MAX: { out = instr->max(inputs[0], inputs[1]); } break; case TGSI_OPCODE_SLT: { out = instr->slt(inputs[0], inputs[1]); } break; case TGSI_OPCODE_SGE: { out = instr->sge(inputs[0], inputs[1]); } break; case TGSI_OPCODE_MAD: { out = instr->madd(inputs[0], inputs[1], inputs[2]); } break; case TGSI_OPCODE_SUB: { out = instr->sub(inputs[0], inputs[1]); } break; case TGSI_OPCODE_LERP: { out = instr->lerp(inputs[0], inputs[1], inputs[2]); } break; case TGSI_OPCODE_CND: break; case TGSI_OPCODE_CND0: break; case TGSI_OPCODE_DOT2ADD: break; case TGSI_OPCODE_INDEX: break; case TGSI_OPCODE_NEGATE: break; case TGSI_OPCODE_FRAC: { out = instr->frc(inputs[0]); } break; case TGSI_OPCODE_CLAMP: break; case TGSI_OPCODE_FLOOR: { out = instr->floor(inputs[0]); } break; case TGSI_OPCODE_ROUND: break; case TGSI_OPCODE_EXPBASE2: { out = instr->ex2(inputs[0]); } break; case TGSI_OPCODE_LOGBASE2: { out = instr->lg2(inputs[0]); } break; case TGSI_OPCODE_POWER: { out = instr->pow(inputs[0], inputs[1]); } break; case TGSI_OPCODE_CROSSPRODUCT: { out = instr->cross(inputs[0], inputs[1]); } break; case TGSI_OPCODE_MULTIPLYMATRIX: break; case TGSI_OPCODE_ABS: { out = instr->abs(inputs[0]); } break; case TGSI_OPCODE_RCC: break; case TGSI_OPCODE_DPH: { out = instr->dph(inputs[0], inputs[1]); } break; case TGSI_OPCODE_COS: { out = instr->cos(inputs[0]); } break; case TGSI_OPCODE_DDX: break; case TGSI_OPCODE_DDY: break; case TGSI_OPCODE_KILP: { out = instr->kilp(inputs[0]); storage->setKilElement(out); return; } break; case TGSI_OPCODE_PK2H: break; case TGSI_OPCODE_PK2US: break; case TGSI_OPCODE_PK4B: break; case TGSI_OPCODE_PK4UB: break; case TGSI_OPCODE_RFL: break; case TGSI_OPCODE_SEQ: break; case TGSI_OPCODE_SFL: break; case TGSI_OPCODE_SGT: { out = instr->sgt(inputs[0], inputs[1]); } break; case TGSI_OPCODE_SIN: { out = instr->sin(inputs[0]); } break; case TGSI_OPCODE_SLE: break; case TGSI_OPCODE_SNE: break; case TGSI_OPCODE_STR: break; case TGSI_OPCODE_TEX: break; case TGSI_OPCODE_TXD: break; case TGSI_OPCODE_UP2H: break; case TGSI_OPCODE_UP2US: break; case TGSI_OPCODE_UP4B: break; case TGSI_OPCODE_UP4UB: break; case TGSI_OPCODE_X2D: break; case TGSI_OPCODE_ARA: break; case TGSI_OPCODE_ARR: break; case TGSI_OPCODE_BRA: break; case TGSI_OPCODE_CAL: { instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr()); return; } break; case TGSI_OPCODE_RET: { instr->end(); return; } break; case TGSI_OPCODE_SSG: break; case TGSI_OPCODE_CMP: { out = instr->cmp(inputs[0], inputs[1], inputs[2]); } break; case TGSI_OPCODE_SCS: { out = instr->scs(inputs[0]); } break; case TGSI_OPCODE_TXB: break; case TGSI_OPCODE_NRM: break; case TGSI_OPCODE_DIV: break; case TGSI_OPCODE_DP2: break; case TGSI_OPCODE_TXL: break; case TGSI_OPCODE_BRK: { instr->brk(); return; } break; case TGSI_OPCODE_IF: { instr->ifop(inputs[0]); storage->setCurrentBlock(instr->currentBlock()); return; //just update the state } break; case TGSI_OPCODE_LOOP: break; case TGSI_OPCODE_REP: break; case TGSI_OPCODE_ELSE: { instr->elseop(); storage->setCurrentBlock(instr->currentBlock()); return; //only state update } break; case TGSI_OPCODE_ENDIF: { instr->endif(); storage->setCurrentBlock(instr->currentBlock()); return; //just update the state } break; case TGSI_OPCODE_ENDLOOP: break; case TGSI_OPCODE_ENDREP: break; case TGSI_OPCODE_PUSHA: break; case TGSI_OPCODE_POPA: break; case TGSI_OPCODE_CEIL: break; case TGSI_OPCODE_I2F: break; case TGSI_OPCODE_NOT: break; case TGSI_OPCODE_TRUNC: { out = instr->trunc(inputs[0]); } break; case TGSI_OPCODE_SHL: break; case TGSI_OPCODE_SHR: break; case TGSI_OPCODE_AND: break; case TGSI_OPCODE_OR: break; case TGSI_OPCODE_MOD: break; case TGSI_OPCODE_XOR: break; case TGSI_OPCODE_SAD: break; case TGSI_OPCODE_TXF: break; case TGSI_OPCODE_TXQ: break; case TGSI_OPCODE_CONT: break; case TGSI_OPCODE_EMIT: break; case TGSI_OPCODE_ENDPRIM: break; case TGSI_OPCODE_BGNLOOP2: { instr->beginLoop(); storage->setCurrentBlock(instr->currentBlock()); return; } break; case TGSI_OPCODE_BGNSUB: { instr->bgnSub(instno); storage->setCurrentBlock(instr->currentBlock()); storage->pushTemps(); return; } break; case TGSI_OPCODE_ENDLOOP2: { instr->endLoop(); storage->setCurrentBlock(instr->currentBlock()); return; } break; case TGSI_OPCODE_ENDSUB: { instr->endSub(); storage->setCurrentBlock(instr->currentBlock()); storage->popArguments(); storage->popTemps(); return; } break; case TGSI_OPCODE_NOISE1: break; case TGSI_OPCODE_NOISE2: break; case TGSI_OPCODE_NOISE3: break; case TGSI_OPCODE_NOISE4: break; case TGSI_OPCODE_NOP: break; case TGSI_OPCODE_TEXBEM: break; case TGSI_OPCODE_TEXBEML: break; case TGSI_OPCODE_TEXREG2AR: break; case TGSI_OPCODE_TEXM3X2PAD: break; case TGSI_OPCODE_TEXM3X2TEX: break; case TGSI_OPCODE_TEXM3X3PAD: break; case TGSI_OPCODE_TEXM3X3TEX: break; case TGSI_OPCODE_TEXM3X3SPEC: break; case TGSI_OPCODE_TEXM3X3VSPEC: break; case TGSI_OPCODE_TEXREG2GB: break; case TGSI_OPCODE_TEXREG2RGB: break; case TGSI_OPCODE_TEXDP3TEX: break; case TGSI_OPCODE_TEXDP3: break; case TGSI_OPCODE_TEXM3X3: break; case TGSI_OPCODE_TEXM3X2DEPTH: break; case TGSI_OPCODE_TEXDEPTH: break; case TGSI_OPCODE_BEM: break; case TGSI_OPCODE_M4X3: break; case TGSI_OPCODE_M3X4: break; case TGSI_OPCODE_M3X3: break; case TGSI_OPCODE_M3X2: break; case TGSI_OPCODE_NRM4: break; case TGSI_OPCODE_CALLNZ: break; case TGSI_OPCODE_IFC: break; case TGSI_OPCODE_BREAKC: break; case TGSI_OPCODE_KIL: break; case TGSI_OPCODE_END: instr->end(); return; break; default: fprintf(stderr, "ERROR: Unknown opcode %d\n", inst->Instruction.Opcode); assert(0); break; } if (!out) { fprintf(stderr, "ERROR: unsupported opcode %d\n", inst->Instruction.Opcode); assert(!"Unsupported opcode"); } /* # not sure if we need this */ switch( inst->Instruction.Saturate ) { case TGSI_SAT_NONE: break; case TGSI_SAT_ZERO_ONE: /*TXT( "_SAT" );*/ break; case TGSI_SAT_MINUS_PLUS_ONE: /*TXT( "_SAT[-1,1]" );*/ break; default: assert( 0 ); } /* store results */ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); } else { fprintf(stderr, "ERROR: unsupported LLVM destination!"); assert(!"wrong destination"); } } } static llvm::Module * tgsi_to_llvm(struct gallivm_prog *prog, const struct tgsi_token *tokens) { llvm::Module *mod = createBaseShader(); struct tgsi_parse_context parse; struct tgsi_full_instruction fi; struct tgsi_full_declaration fd; unsigned instno = 0; Function* shader = mod->getFunction("execute_shader"); std::ostringstream stream; if (prog->type == GALLIVM_VS) { stream << "vs_shader"; } else { stream << "fs_shader"; } stream << prog->id; std::string func_name = stream.str(); shader->setName(func_name.c_str()); Function::arg_iterator args = shader->arg_begin(); Value *ptr_INPUT = args++; ptr_INPUT->setName("input"); BasicBlock *label_entry = new BasicBlock("entry", shader, 0); tgsi_parse_init(&parse, tokens); fi = tgsi_default_full_instruction(); fd = tgsi_default_full_declaration(); Storage storage(label_entry, ptr_INPUT); Instructions instr(mod, shader, label_entry, &storage); while(!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_DECLARATION: translate_declaration(prog, mod, &storage, &parse.FullToken.FullDeclaration, &fd); break; case TGSI_TOKEN_TYPE_IMMEDIATE: translate_immediate(&storage, &parse.FullToken.FullImmediate); break; case TGSI_TOKEN_TYPE_INSTRUCTION: translate_instruction(mod, &storage, &instr, &parse.FullToken.FullInstruction, &fi, instno); ++instno; break; default: assert(0); } } tgsi_parse_free(&parse); prog->num_consts = storage.numConsts(); return mod; } /*! Translates the TGSI tokens into LLVM format. Translated representation is stored in the gallivm_prog and returned. After calling this function the gallivm_prog can either be used with a custom code generator to generate machine code for the GPU which the code generator addresses or it can be jit compiled with gallivm_cpu_jit_compile and executed with gallivm_prog_exec to run the module on the CPU. */ struct gallivm_prog * gallivm_from_tgsi(const struct tgsi_token *tokens, enum gallivm_shader_type type) { std::cout << "Creating llvm from: " <id = GLOBAL_ID; gallivm->type = type; tgsi_dump(tokens, 0); llvm::Module *mod = tgsi_to_llvm(gallivm, tokens); gallivm->module = mod; gallivm_prog_dump(gallivm, 0); /* Run optimization passes over it */ PassManager passes; passes.add(new TargetData(mod)); AddStandardCompilePasses(passes); passes.run(*mod); gallivm->module = mod; gallivm_prog_dump(gallivm, 0); return gallivm; } void gallivm_prog_delete(struct gallivm_prog *prog) { llvm::Module *mod = static_cast(prog->module); delete mod; prog->module = 0; prog->function = 0; free(prog); } typedef void (*vertex_shader_runner)(float (*ainputs)[PIPE_MAX_SHADER_INPUTS][4], float (*dests)[PIPE_MAX_SHADER_INPUTS][4], float (*aconsts)[4], int num_vertices, int num_inputs, int num_attribs, int num_consts); /*! This function is used to execute the gallivm_prog in software. Before calling this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile function. */ int gallivm_prog_exec(struct gallivm_prog *prog, float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], float (*dests)[PIPE_MAX_SHADER_INPUTS][4], float (*consts)[4], int num_vertices, int num_inputs, int num_attribs) { vertex_shader_runner runner = reinterpret_cast(prog->function); assert(runner); runner(inputs, dests, consts, num_vertices, num_inputs, num_attribs, prog->num_consts); return 0; } static inline void constant_interpolation(float (*inputs)[16][4], const struct tgsi_interp_coef *coefs, unsigned attrib, unsigned chan) { unsigned i; for (i = 0; i < QUAD_SIZE; ++i) { inputs[i][attrib][chan] = coefs[attrib].a0[chan]; } } static inline void linear_interpolation(float (*inputs)[16][4], const struct tgsi_interp_coef *coefs, unsigned attrib, unsigned chan) { unsigned i; for( i = 0; i < QUAD_SIZE; i++ ) { const float x = inputs[i][0][0]; const float y = inputs[i][0][1]; inputs[i][attrib][chan] = coefs[attrib].a0[chan] + coefs[attrib].dadx[chan] * x + coefs[attrib].dady[chan] * y; } } static inline void perspective_interpolation(float (*inputs)[16][4], const struct tgsi_interp_coef *coefs, unsigned attrib, unsigned chan ) { unsigned i; for( i = 0; i < QUAD_SIZE; i++ ) { const float x = inputs[i][0][0]; const float y = inputs[i][0][1]; /* WPOS.w here is really 1/w */ const float w = 1.0f / inputs[i][0][3]; assert(inputs[i][0][3] != 0.0); inputs[i][attrib][chan] = (coefs[attrib].a0[chan] + coefs[attrib].dadx[chan] * x + coefs[attrib].dady[chan] * y) * w; } } typedef int (*fragment_shader_runner)(float x, float y, float (*dests)[16][4], float (*inputs)[16][4], int num_attribs, float (*consts)[4], int num_consts, struct tgsi_sampler *samplers); int gallivm_fragment_shader_exec(struct gallivm_prog *prog, float fx, float fy, float (*dests)[16][4], float (*inputs)[16][4], float (*consts)[4], struct tgsi_sampler *samplers) { fragment_shader_runner runner = reinterpret_cast(prog->function); assert(runner); return runner(fx, fy, dests, inputs, prog->num_interp, consts, prog->num_consts, samplers); } void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix) { llvm::Module *mod; if (!prog || !prog->module) return; mod = static_cast(prog->module); if (file_prefix) { std::ostringstream stream; stream << file_prefix; stream << prog->id; stream << ".ll"; std::string name = stream.str(); std::ofstream out(name.c_str()); if (!out) { std::cerr<<"Can't open file : "<getFunctionList(); llvm::Module::FunctionListType::const_iterator itr; std::cout<<"; ---------- Start shader "<id<id<module; llvm::Function *func = 0; switch (prog->type) { case GALLIVM_VS: func = mod->getFunction("run_vertex_shader"); break; case GALLIVM_FS: func = mod->getFunction("run_fragment_shader"); break; default: assert(!"Unknown shader type!"); break; } return func; } /*! This function creates a CPU based execution engine for the given gallivm_prog. gallivm_cpu_engine should be used as a singleton throughout the library. Before executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile. The gallivm_prog instance which is being passed to the constructor is being automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile with it again. */ struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog) { struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *) calloc(1, sizeof(struct gallivm_cpu_engine)); llvm::Module *mod = static_cast(prog->module); llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false); ee->DisableLazyCompilation(); cpu->engine = ee; llvm::Function *func = func_for_shader(prog); prog->function = ee->getPointerToFunction(func); CPU = cpu; return cpu; } /*! This function JIT compiles the given gallivm_prog with the given cpu based execution engine. The reference to the generated machine code entry point will be stored in the gallivm_prog program. After executing this function one can call gallivm_prog_exec in order to execute the gallivm_prog on the CPU. */ void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog) { llvm::Module *mod = static_cast(prog->module); llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); llvm::ExecutionEngine *ee = cpu->engine; assert(ee); ee->DisableLazyCompilation(); ee->addModuleProvider(mp); llvm::Function *func = func_for_shader(prog); prog->function = ee->getPointerToFunction(func); } void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu) { free(cpu); } struct gallivm_cpu_engine * gallivm_global_cpu_engine() { return CPU; } void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, float (*inputs)[16][4], const struct tgsi_interp_coef *coef) { for (int i = 0; i < prog->num_interp; ++i) { const gallivm_interpolate &interp = prog->interpolators[i]; switch (interp.type) { case TGSI_INTERPOLATE_CONSTANT: constant_interpolation(inputs, coef, interp.attrib, interp.chan); break; case TGSI_INTERPOLATE_LINEAR: linear_interpolation(inputs, coef, interp.attrib, interp.chan); break; case TGSI_INTERPOLATE_PERSPECTIVE: perspective_interpolation(inputs, coef, interp.attrib, interp.chan); break; default: assert( 0 ); } } } #endif /* MESA_LLVM */