summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mesa/pipe/draw/draw_vertex_shader_llvm.c5
-rw-r--r--src/mesa/pipe/llvm/llvm_builtins.c115
-rw-r--r--src/mesa/pipe/llvm/llvmtgsi.cpp117
-rw-r--r--src/mesa/pipe/llvm/llvmtgsi.h4
4 files changed, 194 insertions, 47 deletions
diff --git a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
index b38498efc5..fd49901051 100644
--- a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
+++ b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
@@ -112,7 +112,7 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
float (*consts)[4] = (float (*)[4]) draw->mapped_constants;
struct ga_llvm_prog *prog = draw->vertex_shader->state->llvm_prog;
- fprintf(stderr, "XX q(%d) ", draw->vs.queue_nr);
+ fprintf(stderr, "--- XX q(%d) ", draw->vs.queue_nr);
/* fetch the inputs */
for (i = 0; i < draw->vs.queue_nr; ++i) {
@@ -123,7 +123,8 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
/* batch execute the shaders on all the vertices */
ga_llvm_prog_exec(prog, inputs, dests, consts,
- draw->vs.queue_nr);
+ draw->vs.queue_nr,
+ draw->vertex_info.num_attribs);
draw->vs.queue_nr = 0;
}
diff --git a/src/mesa/pipe/llvm/llvm_builtins.c b/src/mesa/pipe/llvm/llvm_builtins.c
index 0f0efeb303..c7a9ea0d5a 100644
--- a/src/mesa/pipe/llvm/llvm_builtins.c
+++ b/src/mesa/pipe/llvm/llvm_builtins.c
@@ -1,20 +1,29 @@
+/* clang --emit-llvm llvm_builtins.c |llvm-as |opt -std-compile-opts |llvm-dis */
+/* clang --emit-llvm llvm_builtins.c |llvm-as |opt -std-compile-opts |llvm2cpp -for=Shader -gen-module -funcname=createBaseShader */
+typedef __attribute__(( ocu_vector_type(4) )) float float4;
+#if 0
+//clang doesn't suppoer "struct->member" notation yet
+struct vertex_header {
+ unsigned clipmask:12;
+ unsigned edgeflag:1;
+ unsigned pad:3;
+ unsigned vertex_id:16;
-inline float4 compute_clip(float4 vec, float4 scale, float4 trans)
-{
- return vec*scale + trans;
-}
+ float clip[4];
+ float data[][4];
+};
inline float
-dot4(const float4 a, const float4 b)
+dot4(float4 a, float4 b)
{
float4 c = a*b;
return c.x + c.y + c.z + c.w;
}
inline unsigned
-compute_clipmask(float4 clip, const float4 (*plane), unsigned nr)
+compute_clipmask(float4 clip, float4 (*plane), unsigned nr)
{
unsigned mask = 0;
unsigned i;
@@ -29,7 +38,8 @@ compute_clipmask(float4 clip, const float4 (*plane), unsigned nr)
inline void collect_results(float4 *results, struct vertex_header *vOut,
float4 *planes, int nr_planes,
- float4 scale, float4 trans)
+ float4 scale, float4 trans,
+ int num_attribs)
{
/* store results */
unsigned slot;
@@ -38,13 +48,14 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
/* Handle attr[0] (position) specially:
*/
float4 res0 = results[0];
- x = vOut->clip[0] = clip.x;
- y = vOut->clip[1] = clip.y;
- z = vOut->clip[2] = clip.z;
- w = vOut->clip[3] = clip.w;
+ float *clip = vOut->clip;
+ x = clip[0] = res0.x;
+ y = clip[1] = res0.y;
+ z = clip[2] = res0.z;
+ w = clip[3] = res0.w;
- vOut[i]->clipmask = compute_clipmask(res0, planes, nr_planes);
- vOut[i]->edgeflag = 1;
+ vOut->clipmask = compute_clipmask(res0, planes, nr_planes);
+ vOut->edgeflag = 1;
/* divide by w */
w = 1.0f / w;
@@ -54,10 +65,10 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
res0.x = x; res0.y = y; res0.z = z; res0.w = 1;
/* Viewport mapping */
- res = res * scale + trans;
- vOut->data[0][0] = res.x;
- vOut->data[0][1] = res.y;
- vOut->data[0][2] = res.z;
+ res0 = res0 * scale + trans;
+ vOut->data[0][0] = res0.x;
+ vOut->data[0][1] = res0.y;
+ vOut->data[0][2] = res0.z;
vOut->data[0][3] = w;
/* Remaining attributes are packed into sequential post-transform
@@ -65,7 +76,7 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
* Skip 0 since we just did it above.
* Subtract two because of the VERTEX_HEADER, CLIP_POS attribs.
*/
- for (slot = 1; slot < draw->vertex_info.num_attribs - 2; slot++) {
+ for (slot = 1; slot < num_attribs - 2; slot++) {
float4 vec = results[slot];
vOut->data[slot][0] = vec.x;
vOut->data[slot][1] = vec.y;
@@ -79,12 +90,68 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
vOut->data[slot][3]);
}
}
+#endif
-void run_vertex_shader(float ainputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4],
- struct vertex_header *dests[VS_QUEUE_LENGTH],
- float *aconsts[4]
- int count)
+void from_array(float4 (*res)[32], float (*ainputs)[32][4],
+ int count, int num_attribs)
{
- float4 inputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS];
- float4 *consts;
+ for (int i = 0; i < count; ++i) {
+ for (int j = 0; j < num_attribs; ++j) {
+ float4 vec;
+ vec.x = ainputs[i][j][0];
+ vec.y = ainputs[i][j][1];
+ vec.z = ainputs[i][j][2];
+ vec.w = ainputs[i][j][3];
+ res[i][j] = vec;
+ }
+ }
+}
+
+void from_consts(float4 *res, float (*ainputs)[4],
+ int count)
+{
+ for (int i = 0; i < count; ++i) {
+ float4 vec;
+ vec.x = ainputs[i][0];
+ vec.y = ainputs[i][1];
+ vec.z = ainputs[i][2];
+ vec.w = ainputs[i][3];
+ res[i] = vec;
+ }
+}
+
+void to_array(float (*dests)[4], float4 *in, int num_attribs)
+{
+ for (int i = 0; i < num_attribs; ++i) {
+ float *rd = dests[i];
+ float4 ri = in[i];
+ rd[0] = ri.x;
+ rd[1] = ri.y;
+ rd[2] = ri.z;
+ rd[3] = ri.w;
+ }
+}
+
+extern void execute_shader(float4 *dests, float4 *inputs,
+ float4 *consts);
+
+void run_vertex_shader(float (*ainputs)[32][4],
+ float (*dests)[32][4],
+ float (*aconsts)[4],
+ int count,
+ int num_attribs)
+{
+ float4 inputs[16*32*4][32];
+ float4 consts[32];
+ float4 results[16*32*4][32];
+
+ printf("XXXXXXXXXXX run_vertex_shader\n");
+ from_array(inputs, ainputs, count, num_attribs);
+ from_consts(consts, aconsts, 32);
+ for (int i = 0; i < count; ++i) {
+ float4 *in = inputs[i];
+ float4 *res = results[i];
+ to_array(dests[i], results[i], num_attribs);
+ execute_shader(res, in, consts);
+ }
}
diff --git a/src/mesa/pipe/llvm/llvmtgsi.cpp b/src/mesa/pipe/llvm/llvmtgsi.cpp
index 46b7561b5e..1abc148521 100644
--- a/src/mesa/pipe/llvm/llvmtgsi.cpp
+++ b/src/mesa/pipe/llvm/llvmtgsi.cpp
@@ -14,15 +14,88 @@
#include <llvm/DerivedTypes.h>
#include <llvm/Instructions.h>
#include <llvm/ModuleProvider.h>
+#include <llvm/Pass.h>
+#include <llvm/PassManager.h>
#include <llvm/ParameterAttributes.h>
#include <llvm/Support/PatternMatch.h>
#include <llvm/ExecutionEngine/JIT.h>
#include <llvm/ExecutionEngine/Interpreter.h>
#include <llvm/ExecutionEngine/GenericValue.h>
#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/LinkAllPasses.h>
+#include <llvm/Analysis/Verifier.h>
+#include <llvm/Analysis/LoopPass.h>
+#include <llvm/Target/TargetData.h>
#include <llvm/Bitcode/ReaderWriter.h>
#include <iostream>
+using namespace llvm;
+#include "llvm_base_shader.cpp"
+
+
+static inline void addPass(PassManager &PM, Pass *P) {
+ // Add the pass to the pass manager...
+ PM.add(P);
+}
+
+static inline void AddStandardCompilePasses(PassManager &PM) {
+ PM.add(createVerifierPass()); // Verify that input is correct
+
+ addPass(PM, createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp
+
+ // If the -strip-debug command line option was specified, do it.
+ //if (StripDebug)
+ // addPass(PM, createStripSymbolsPass(true));
+
+ addPass(PM, createRaiseAllocationsPass()); // call %malloc -> malloc inst
+ addPass(PM, createCFGSimplificationPass()); // Clean up disgusting code
+ addPass(PM, createPromoteMemoryToRegisterPass());// Kill useless allocas
+ addPass(PM, createGlobalOptimizerPass()); // Optimize out global vars
+ addPass(PM, createGlobalDCEPass()); // Remove unused fns and globs
+ addPass(PM, createIPConstantPropagationPass());// IP Constant Propagation
+ addPass(PM, createDeadArgEliminationPass()); // Dead argument elimination
+ addPass(PM, createInstructionCombiningPass()); // Clean up after IPCP & DAE
+ addPass(PM, createCFGSimplificationPass()); // Clean up after IPCP & DAE
+
+ addPass(PM, createPruneEHPass()); // Remove dead EH info
+
+ //if (!DisableInline)
+ addPass(PM, createFunctionInliningPass()); // Inline small functions
+ addPass(PM, createArgumentPromotionPass()); // Scalarize uninlined fn args
+
+ addPass(PM, createTailDuplicationPass()); // Simplify cfg by copying code
+ addPass(PM, createInstructionCombiningPass()); // Cleanup for scalarrepl.
+ addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs
+ addPass(PM, createScalarReplAggregatesPass()); // Break up aggregate allocas
+ addPass(PM, createInstructionCombiningPass()); // Combine silly seq's
+ addPass(PM, createCondPropagationPass()); // Propagate conditionals
+
+ addPass(PM, createTailCallEliminationPass()); // Eliminate tail calls
+ addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs
+ addPass(PM, createReassociatePass()); // Reassociate expressions
+ addPass(PM, createLoopRotatePass());
+ addPass(PM, createLICMPass()); // Hoist loop invariants
+ addPass(PM, createLoopUnswitchPass()); // Unswitch loops.
+ addPass(PM, createLoopIndexSplitPass()); // Index split loops.
+ addPass(PM, createInstructionCombiningPass()); // Clean up after LICM/reassoc
+ addPass(PM, createIndVarSimplifyPass()); // Canonicalize indvars
+ addPass(PM, createLoopUnrollPass()); // Unroll small loops
+ addPass(PM, createInstructionCombiningPass()); // Clean up after the unroller
+ addPass(PM, createGVNPass()); // Remove redundancies
+ addPass(PM, createSCCPPass()); // Constant prop with SCCP
+
+ // Run instcombine after redundancy elimination to exploit opportunities
+ // opened up by them.
+ addPass(PM, createInstructionCombiningPass());
+ addPass(PM, createCondPropagationPass()); // Propagate conditionals
+
+ addPass(PM, createDeadStoreEliminationPass()); // Delete dead stores
+ addPass(PM, createAggressiveDCEPass()); // SSA based 'Aggressive DCE'
+ addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs
+ addPass(PM, createSimplifyLibCallsPass()); // Library Call Optimizations
+ addPass(PM, createDeadTypeEliminationPass()); // Eliminate dead types
+ addPass(PM, createConstantMergePass()); // Merge dup global constants
+}
static void
translate_declaration(llvm::Module *module,
@@ -341,7 +414,7 @@ translate_instruction(llvm::Module *module,
static llvm::Module *
tgsi_to_llvm(const struct tgsi_token *tokens)
{
- llvm::Module *mod = new llvm::Module("tgsi");
+ llvm::Module *mod = createBaseShader();
struct tgsi_parse_context parse;
struct tgsi_full_instruction fi;
struct tgsi_full_declaration fd;
@@ -402,18 +475,33 @@ ga_llvm_from_tgsi(const struct tgsi_token *tokens)
struct ga_llvm_prog *ga_llvm =
(struct ga_llvm_prog *)malloc(sizeof(struct ga_llvm_prog));
llvm::Module *mod = tgsi_to_llvm(tokens);
+
+ /* Run optimization passes over it */
+ PassManager passes;
+ // Add an appropriate TargetData instance for this module...
+ passes.add(new TargetData(mod));
+ AddStandardCompilePasses(passes);
+ std::cout<<"Running optimization passes..."<<std::endl;
+ bool b = passes.run(*mod);
+ std::cout<<"\tModified mod = "<<b<<std::endl;
+
llvm::ExistingModuleProvider *mp =
new llvm::ExistingModuleProvider(mod);
- //llvm::ExecutionEngine *ee =
- // llvm::ExecutionEngine::create(mp, false);
+ llvm::ExecutionEngine *ee =
+ llvm::ExecutionEngine::create(mp, false);
ga_llvm->module = mod;
- ga_llvm->engine = 0;//ee;
+ ga_llvm->engine = ee;
fprintf(stderr, "DUMPX \n");
//tgsi_dump(tokens, TGSI_DUMP_VERBOSE);
tgsi_dump(tokens, 0);
fprintf(stderr, "DUMPEND \n");
+ Function *func = mod->getFunction("run_vertex_shader");
+ std::cout << "run_vertex_shader = "<<func;
+ ga_llvm->function = ee->getPointerToFunctionOrStub(func);
+ std::cout << " -- FUNC is " <<ga_llvm->function;
+
return ga_llvm;
}
@@ -423,6 +511,7 @@ void ga_llvm_prog_delete(struct ga_llvm_prog *prog)
delete mod;
prog->module = 0;
prog->engine = 0;
+ prog->function = 0;
free(prog);
}
@@ -430,24 +519,12 @@ int ga_llvm_prog_exec(struct ga_llvm_prog *prog,
float (*inputs)[32][4],
void *dests[16*32*4],
float (*consts)[4],
- int count)
+ int count,
+ int num_attribs)
{
- //std::cout << "START "<<std::endl;
- llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
- llvm::Function *func = mod->getFunction("main");
- llvm::ExecutionEngine *ee = static_cast<llvm::ExecutionEngine*>(prog->engine);
-
- std::vector<llvm::GenericValue> args(0);
- //args[0] = GenericValue(&st);
- //std::cout << "Mod is "<<*mod;
- //std::cout << "\n\nRunning llvm: " << std::endl;
- if (func) {
- std::cout << "Func is "<<func;
- llvm::GenericValue gv = ee->runFunction(func, args);
- }
+ std::cout << "---- START LLVM Execution "<<std::endl;
-//delete ee;
-//delete mp;
+ std::cout << "---- END LLVM Execution "<<std::endl;
return 0;
}
diff --git a/src/mesa/pipe/llvm/llvmtgsi.h b/src/mesa/pipe/llvm/llvmtgsi.h
index 9fbb0ea8f9..b1b5717f6d 100644
--- a/src/mesa/pipe/llvm/llvmtgsi.h
+++ b/src/mesa/pipe/llvm/llvmtgsi.h
@@ -12,6 +12,7 @@ struct tgsi_sampler;
struct ga_llvm_prog {
void *module;
void *engine;
+ void *function;
};
struct ga_llvm_prog *
ga_llvm_from_tgsi(const struct tgsi_token *tokens);
@@ -22,7 +23,8 @@ int ga_llvm_prog_exec(struct ga_llvm_prog *prog,
float (*inputs)[32][4],
void *dests[16*32*4],
float (*consts)[4],
- int count);
+ int count,
+ int num_attribs);
#if defined __cplusplus
} // extern "C"