summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorJosé Fonseca <jfonseca@vmware.com>2009-08-14 10:03:46 +0100
committerJosé Fonseca <jfonseca@vmware.com>2009-08-29 09:21:32 +0100
commit95f38dd67c7dfeb3172ef9723c6f8e4c33de0754 (patch)
tree7ed94f811d1860e3f8f693d582c52d23fae62e90 /src/gallium/drivers
parentaf608e56ca246232ef11a561040b84801ae3a552 (diff)
llvmpipe: Code generate interpolators.
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi.h5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c98
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fs_llvm.c244
3 files changed, 169 insertions, 178 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
index 020db003c2..eb50462210 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
@@ -44,7 +44,10 @@ void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
union lp_type type,
- LLVMValueRef (*inputs)[4],
+ LLVMValueRef *pos,
+ LLVMValueRef a0_ptr,
+ LLVMValueRef dadx_ptr,
+ LLVMValueRef dady_ptr,
LLVMValueRef consts_ptr,
LLVMValueRef (*outputs)[4],
LLVMValueRef samplers_ptr);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index c9143ebfe4..1f489a359b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -69,13 +69,21 @@ struct lp_build_tgsi_soa_context
{
struct lp_build_context base;
- LLVMValueRef (*inputs)[4];
+ LLVMValueRef x, y, w;
+ LLVMValueRef a0_ptr;
+ LLVMValueRef dadx_ptr;
+ LLVMValueRef dady_ptr;
+
LLVMValueRef consts_ptr;
- LLVMValueRef (*outputs)[4];
+ LLVMValueRef (*outputs)[NUM_CHANNELS];
LLVMValueRef samplers_ptr;
- LLVMValueRef immediates[LP_MAX_IMMEDIATES][4];
- LLVMValueRef temps[LP_MAX_TEMPS][4];
+ LLVMValueRef oow;
+
+ LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+ LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
+ LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
/** Coords/texels store */
LLVMValueRef store_ptr;
@@ -1339,48 +1347,70 @@ emit_declaration(
struct lp_build_tgsi_soa_context *bld,
struct tgsi_full_declaration *decl )
{
-#if 0
if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+ LLVMBuilderRef builder = bld->base.builder;
unsigned first, last, mask;
- unsigned i, j;
- LLVMValueRef tmp;
+ unsigned attrib, chan;
first = decl->DeclarationRange.First;
last = decl->DeclarationRange.Last;
mask = decl->Declaration.UsageMask;
- for( i = first; i <= last; i++ ) {
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- switch( decl->Declaration.Interpolate ) {
- case TGSI_INTERPOLATE_CONSTANT:
- bld->inputs[i][j] = bld->interp_coefs[i].a0[j];
- break;
+ for( attrib = first; attrib <= last; attrib++ ) {
+ for( chan = 0; chan < NUM_CHANNELS; chan++ ) {
+ LLVMValueRef input = bld->base.undef;
- case TGSI_INTERPOLATE_LINEAR:
- tmp = bld->interp_coefs[i].a0[j];
- tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j]));
- tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j]));
- bld->inputs[i][j] = tmp;
- break;
+ if( mask & (1 << chan) ) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
+ LLVMValueRef a0;
+ LLVMValueRef dadx;
+ LLVMValueRef dady;
+ char name[32];
+ switch( decl->Declaration.Interpolate ) {
case TGSI_INTERPOLATE_PERSPECTIVE:
- tmp = bld->interp_coefs[i].a0[j];
- tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j]));
- tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j]));
- tmp = lp_build_div(&bld->base, tmp, bld->pos[3]);
- bld->inputs[i][j] = tmp;
+ case TGSI_INTERPOLATE_LINEAR: {
+ LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, "");
+ LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, "");
+ util_snprintf(name, sizeof name, "dadx_%u.%c", attrib, "xyzw"[chan]);
+ dadx = LLVMBuildLoad(builder, dadx_ptr, name);
+ util_snprintf(name, sizeof name, "dady_%u.%c", attrib, "xyzw"[chan]);
+ dady = LLVMBuildLoad(builder, dady_ptr, name);
+ }
+
+ case TGSI_INTERPOLATE_CONSTANT: {
+ LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, "");
+ util_snprintf(name, sizeof name, "a0_%u.%c", attrib, "xyzw"[chan]);
+ a0 = LLVMBuildLoad(builder, a0_ptr, name);
break;
+ }
default:
- assert( 0 );
- break;
+ assert(0);
+ break;
+ }
+
+ input = a0;
+
+ if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) {
+ input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx));
+ input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady));
}
+
+ if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
+ if(!bld->oow)
+ bld->oow = lp_build_rcp(&bld->base, bld->w);
+ input = lp_build_mul(&bld->base, input, bld->oow);
+ }
+
+ util_snprintf(name, sizeof name, "input%u.%c", attrib, "xyzw"[chan]);
+ LLVMSetValueName(input, name);
}
+
+ bld->inputs[attrib][chan] = input;
}
}
}
-#endif
}
/**
@@ -1396,7 +1426,10 @@ void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
union lp_type type,
- LLVMValueRef (*inputs)[4],
+ LLVMValueRef *pos,
+ LLVMValueRef a0_ptr,
+ LLVMValueRef dadx_ptr,
+ LLVMValueRef dady_ptr,
LLVMValueRef consts_ptr,
LLVMValueRef (*outputs)[4],
LLVMValueRef samplers_ptr)
@@ -1409,7 +1442,12 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
/* Setup build context */
memset(&bld, 0, sizeof bld);
lp_build_context_init(&bld.base, builder, type);
- bld.inputs = inputs;
+ bld.x = pos[0];
+ bld.y = pos[1];
+ bld.w = pos[3];
+ bld.a0_ptr = a0_ptr;
+ bld.dadx_ptr = dadx_ptr;
+ bld.dady_ptr = dady_ptr;
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
bld.samplers_ptr = samplers_ptr;
diff --git a/src/gallium/drivers/llvmpipe/lp_fs_llvm.c b/src/gallium/drivers/llvmpipe/lp_fs_llvm.c
index 2c1a849785..ef1c8c32c2 100644
--- a/src/gallium/drivers/llvmpipe/lp_fs_llvm.c
+++ b/src/gallium/drivers/llvmpipe/lp_fs_llvm.c
@@ -47,7 +47,10 @@
typedef void
-(*lp_shader_fs_func)(void *inputs,
+(*lp_shader_fs_func)(void *pos,
+ void *a0,
+ void *dadx,
+ void *dady,
void *consts,
void *outputs,
struct tgsi_sampler **samplers);
@@ -65,6 +68,13 @@ struct lp_llvm_fragment_shader
LLVMValueRef function;
lp_shader_fs_func jit_function;
+
+ union tgsi_exec_channel ALIGN16_ATTRIB pos[NUM_CHANNELS];
+ union tgsi_exec_channel ALIGN16_ATTRIB a0[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ union tgsi_exec_channel ALIGN16_ATTRIB dadx[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ union tgsi_exec_channel ALIGN16_ATTRIB dady[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+ uint32_t magic;
};
@@ -84,15 +94,19 @@ shader_generate(struct llvmpipe_screen *screen,
union lp_type type;
LLVMTypeRef elem_type;
LLVMTypeRef vec_type;
- LLVMTypeRef args[4];
- LLVMValueRef inputs_ptr;
+ LLVMTypeRef arg_types[7];
+ LLVMTypeRef func_type;
+ LLVMValueRef pos_ptr;
+ LLVMValueRef a0_ptr;
+ LLVMValueRef dadx_ptr;
+ LLVMValueRef dady_ptr;
LLVMValueRef consts_ptr;
LLVMValueRef outputs_ptr;
LLVMValueRef samplers_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
- LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][4];
- LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][4];
+ LLVMValueRef pos[NUM_CHANNELS];
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
char name[32];
unsigned i, j;
@@ -106,19 +120,31 @@ shader_generate(struct llvmpipe_screen *screen,
elem_type = lp_build_elem_type(type);
vec_type = lp_build_vec_type(type);
- args[0] = LLVMPointerType(vec_type, 0);
- args[1] = LLVMPointerType(elem_type, 0);
- args[2] = LLVMPointerType(vec_type, 0);
- args[3] = LLVMPointerType(LLVMInt8Type(), 0);
- shader->function = LLVMAddFunction(screen->module, "shader", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
- LLVMSetFunctionCallConv(shader->function, LLVMCCallConv);
+ arg_types[0] = LLVMPointerType(vec_type, 0); /* pos */
+ arg_types[1] = LLVMPointerType(vec_type, 0); /* a0 */
+ arg_types[2] = LLVMPointerType(vec_type, 0); /* dadx */
+ arg_types[3] = LLVMPointerType(vec_type, 0); /* dady */
+ arg_types[4] = LLVMPointerType(elem_type, 0); /* consts */
+ arg_types[5] = LLVMPointerType(vec_type, 0); /* outputs */
+ arg_types[6] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */
+
+ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
- inputs_ptr = LLVMGetParam(shader->function, 0);
- consts_ptr = LLVMGetParam(shader->function, 1);
- outputs_ptr = LLVMGetParam(shader->function, 2);
- samplers_ptr = LLVMGetParam(shader->function, 3);
+ shader->function = LLVMAddFunction(screen->module, "shader", func_type);
+ LLVMSetFunctionCallConv(shader->function, LLVMCCallConv);
- LLVMSetValueName(inputs_ptr, "inputs");
+ pos_ptr = LLVMGetParam(shader->function, 0);
+ a0_ptr = LLVMGetParam(shader->function, 1);
+ dadx_ptr = LLVMGetParam(shader->function, 2);
+ dady_ptr = LLVMGetParam(shader->function, 3);
+ consts_ptr = LLVMGetParam(shader->function, 4);
+ outputs_ptr = LLVMGetParam(shader->function, 5);
+ samplers_ptr = LLVMGetParam(shader->function, 6);
+
+ LLVMSetValueName(pos_ptr, "pos");
+ LLVMSetValueName(a0_ptr, "a0");
+ LLVMSetValueName(dadx_ptr, "dadx");
+ LLVMSetValueName(dady_ptr, "dady");
LLVMSetValueName(consts_ptr, "consts");
LLVMSetValueName(outputs_ptr, "outputs");
LLVMSetValueName(samplers_ptr, "samplers");
@@ -127,23 +153,23 @@ shader_generate(struct llvmpipe_screen *screen,
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- for(i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) {
- for(j = 0; j < 4; ++j) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*4 + j, 0);
- util_snprintf(name, sizeof name, "input%u.%c", i, "xywz"[j]);
- inputs[i][j] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, inputs_ptr, &index, 1, ""), name);
- }
+ for(j = 0; j < NUM_CHANNELS; ++j) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), j, 0);
+ util_snprintf(name, sizeof name, "pos.%c", "xyzw"[j]);
+ pos[j] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, pos_ptr, &index, 1, ""), name);
}
memset(outputs, 0, sizeof outputs);
- lp_build_tgsi_soa(builder, tokens, type, inputs, consts_ptr, outputs, samplers_ptr);
+ lp_build_tgsi_soa(builder, tokens, type,
+ pos, a0_ptr, dadx_ptr, dady_ptr,
+ consts_ptr, outputs, samplers_ptr);
for(i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
- for(j = 0; j < 4; ++j) {
+ for(j = 0; j < NUM_CHANNELS; ++j) {
if(outputs[i][j]) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*4 + j, 0);
- util_snprintf(name, sizeof name, "output%u.%c", i, "xywz"[j]);
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*NUM_CHANNELS + j, 0);
+ util_snprintf(name, sizeof name, "output%u.%c", i, "xyzw"[j]);
LLVMBuildStore(builder, outputs[i][j], LLVMBuildGEP(builder, outputs_ptr, &index, 1, name));
}
}
@@ -175,131 +201,56 @@ fs_llvm_prepare( const struct lp_fragment_shader *base,
-
-/**
- * Evaluate a constant-valued coefficient at the position of the
- * current quad.
- */
static void
-eval_constant_coef(
- struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
+setup_pos_vector(struct lp_llvm_fragment_shader *shader,
+ const struct tgsi_interp_coef *coef,
+ float x, float y)
{
- unsigned i;
-
- for( i = 0; i < QUAD_SIZE; i++ ) {
- mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
+ uint chan;
+
+ /* do X */
+ shader->pos[0].f[0] = x;
+ shader->pos[0].f[1] = x + 1;
+ shader->pos[0].f[2] = x;
+ shader->pos[0].f[3] = x + 1;
+
+ /* do Y */
+ shader->pos[1].f[0] = y;
+ shader->pos[1].f[1] = y;
+ shader->pos[1].f[2] = y + 1;
+ shader->pos[1].f[3] = y + 1;
+
+ /* do Z and W for all fragments in the quad */
+ for (chan = 2; chan < 4; chan++) {
+ const float dadx = coef->dadx[chan];
+ const float dady = coef->dady[chan];
+ const float a0 = coef->a0[chan] + dadx * x + dady * y;
+ shader->pos[chan].f[0] = a0;
+ shader->pos[chan].f[1] = a0 + dadx;
+ shader->pos[chan].f[2] = a0 + dady;
+ shader->pos[chan].f[3] = a0 + dadx + dady;
}
}
-/**
- * Evaluate a linear-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_linear_coef(
- struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
-{
- const float x = mach->QuadPos.xyzw[0].f[0];
- const float y = mach->QuadPos.xyzw[1].f[0];
- const float dadx = mach->InterpCoefs[attrib].dadx[chan];
- const float dady = mach->InterpCoefs[attrib].dady[chan];
- const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
- mach->Inputs[attrib].xyzw[chan].f[0] = a0;
- mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
- mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
- mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
-}
-
-/**
- * Evaluate a perspective-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_perspective_coef(
- struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
-{
- const float x = mach->QuadPos.xyzw[0].f[0];
- const float y = mach->QuadPos.xyzw[1].f[0];
- const float dadx = mach->InterpCoefs[attrib].dadx[chan];
- const float dady = mach->InterpCoefs[attrib].dady[chan];
- const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
- const float *w = mach->QuadPos.xyzw[3].f;
- /* divide by W here */
- mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
- mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
- mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
- mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
-}
-
-
-typedef void
-(*eval_coef_func)(struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan );
-
static void
-exec_declaration(
- struct tgsi_exec_machine *mach,
- const struct tgsi_full_declaration *decl )
+setup_coef_vector(struct lp_llvm_fragment_shader *shader,
+ const struct tgsi_interp_coef *coef)
{
- if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- eval_coef_func eval;
-
- first = decl->DeclarationRange.First;
- last = decl->DeclarationRange.Last;
- mask = decl->Declaration.UsageMask;
-
- switch( decl->Declaration.Interpolate ) {
- case TGSI_INTERPOLATE_CONSTANT:
- eval = eval_constant_coef;
- break;
-
- case TGSI_INTERPOLATE_LINEAR:
- eval = eval_linear_coef;
- break;
-
- case TGSI_INTERPOLATE_PERSPECTIVE:
- eval = eval_perspective_coef;
- break;
-
- default:
- eval = NULL;
- assert( 0 );
- }
-
- if( mask == TGSI_WRITEMASK_XYZW ) {
- unsigned i, j;
-
- for( i = first; i <= last; i++ ) {
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- eval( mach, i, j );
- }
- }
- }
- else {
- unsigned i, j;
-
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- for( i = first; i <= last; i++ ) {
- eval( mach, i, j );
- }
- }
- }
+ unsigned attrib, chan, i;
+
+ for (attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; ++attrib) {
+ for (chan = 0; chan < NUM_CHANNELS; ++chan) {
+ for( i = 0; i < QUAD_SIZE; ++i ) {
+ shader->a0[attrib][chan].f[i] = coef[attrib].a0[chan];
+ shader->dadx[attrib][chan].f[i] = coef[attrib].dadx[chan];
+ shader->dady[attrib][chan].f[i] = coef[attrib].dady[chan];
}
}
}
}
+
/* TODO: codegenerate the whole run function, skip this wrapper.
* TODO: break dependency on tgsi_exec_machine struct
* TODO: push Position calculation into the generated shader
@@ -311,25 +262,24 @@ fs_llvm_run( const struct lp_fragment_shader *base,
struct quad_header *quad )
{
struct lp_llvm_fragment_shader *shader = lp_llvm_fragment_shader(base);
- unsigned i;
unsigned mask;
/* Compute X, Y, Z, W vals for this quad */
- lp_setup_pos_vector(quad->posCoef,
- (float)quad->input.x0, (float)quad->input.y0,
- &machine->QuadPos);
+ setup_pos_vector(shader,
+ quad->posCoef,
+ (float)quad->input.x0, (float)quad->input.y0);
+
+ setup_coef_vector(shader,
+ quad->coef);
/* init kill mask */
tgsi_set_kill_mask(machine, 0x0);
tgsi_set_exec_mask(machine, 1, 1, 1, 1);
- /* execute declarations (interpolants) */
- for (i = 0; i < machine->NumDeclarations; i++)
- exec_declaration( machine, &machine->Declarations[i] );
-
memset(machine->Outputs, 0, sizeof machine->Outputs);
- shader->jit_function( machine->Inputs,
+ shader->jit_function( shader->pos,
+ shader->a0, shader->dadx, shader->dady,
machine->Consts,
machine->Outputs,
machine->Samplers);