diff options
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_tgsi.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 98 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_fs_llvm.c | 244 |
3 files changed, 169 insertions, 178 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h index 020db003c2..eb50462210 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h @@ -44,7 +44,10 @@ void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, union lp_type type, - LLVMValueRef (*inputs)[4], + LLVMValueRef *pos, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr, LLVMValueRef consts_ptr, LLVMValueRef (*outputs)[4], LLVMValueRef samplers_ptr); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index c9143ebfe4..1f489a359b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -69,13 +69,21 @@ struct lp_build_tgsi_soa_context { struct lp_build_context base; - LLVMValueRef (*inputs)[4]; + LLVMValueRef x, y, w; + LLVMValueRef a0_ptr; + LLVMValueRef dadx_ptr; + LLVMValueRef dady_ptr; + LLVMValueRef consts_ptr; - LLVMValueRef (*outputs)[4]; + LLVMValueRef (*outputs)[NUM_CHANNELS]; LLVMValueRef samplers_ptr; - LLVMValueRef immediates[LP_MAX_IMMEDIATES][4]; - LLVMValueRef temps[LP_MAX_TEMPS][4]; + LLVMValueRef oow; + + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + + LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; + LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; /** Coords/texels store */ LLVMValueRef store_ptr; @@ -1339,48 +1347,70 @@ emit_declaration( struct lp_build_tgsi_soa_context *bld, struct tgsi_full_declaration *decl ) { -#if 0 if( decl->Declaration.File == TGSI_FILE_INPUT ) { + LLVMBuilderRef builder = bld->base.builder; unsigned first, last, mask; - unsigned i, j; - LLVMValueRef tmp; + unsigned attrib, chan; first = decl->DeclarationRange.First; last = decl->DeclarationRange.Last; mask = decl->Declaration.UsageMask; - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - bld->inputs[i][j] = bld->interp_coefs[i].a0[j]; - break; + for( attrib = first; attrib <= last; attrib++ ) { + for( chan = 0; chan < NUM_CHANNELS; chan++ ) { + LLVMValueRef input = bld->base.undef; - case TGSI_INTERPOLATE_LINEAR: - tmp = bld->interp_coefs[i].a0[j]; - tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j])); - tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j])); - bld->inputs[i][j] = tmp; - break; + if( mask & (1 << chan) ) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0); + LLVMValueRef a0; + LLVMValueRef dadx; + LLVMValueRef dady; + char name[32]; + switch( decl->Declaration.Interpolate ) { case TGSI_INTERPOLATE_PERSPECTIVE: - tmp = bld->interp_coefs[i].a0[j]; - tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j])); - tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j])); - tmp = lp_build_div(&bld->base, tmp, bld->pos[3]); - bld->inputs[i][j] = tmp; + case TGSI_INTERPOLATE_LINEAR: { + LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, ""); + LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, ""); + util_snprintf(name, sizeof name, "dadx_%u.%c", attrib, "xyzw"[chan]); + dadx = LLVMBuildLoad(builder, dadx_ptr, name); + util_snprintf(name, sizeof name, "dady_%u.%c", attrib, "xyzw"[chan]); + dady = LLVMBuildLoad(builder, dady_ptr, name); + } + + case TGSI_INTERPOLATE_CONSTANT: { + LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, ""); + util_snprintf(name, sizeof name, "a0_%u.%c", attrib, "xyzw"[chan]); + a0 = LLVMBuildLoad(builder, a0_ptr, name); break; + } default: - assert( 0 ); - break; + assert(0); + break; + } + + input = a0; + + if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) { + input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx)); + input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady)); } + + if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { + if(!bld->oow) + bld->oow = lp_build_rcp(&bld->base, bld->w); + input = lp_build_mul(&bld->base, input, bld->oow); + } + + util_snprintf(name, sizeof name, "input%u.%c", attrib, "xyzw"[chan]); + LLVMSetValueName(input, name); } + + bld->inputs[attrib][chan] = input; } } } -#endif } /** @@ -1396,7 +1426,10 @@ void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, union lp_type type, - LLVMValueRef (*inputs)[4], + LLVMValueRef *pos, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr, LLVMValueRef consts_ptr, LLVMValueRef (*outputs)[4], LLVMValueRef samplers_ptr) @@ -1409,7 +1442,12 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, builder, type); - bld.inputs = inputs; + bld.x = pos[0]; + bld.y = pos[1]; + bld.w = pos[3]; + bld.a0_ptr = a0_ptr; + bld.dadx_ptr = dadx_ptr; + bld.dady_ptr = dady_ptr; bld.outputs = outputs; bld.consts_ptr = consts_ptr; bld.samplers_ptr = samplers_ptr; diff --git a/src/gallium/drivers/llvmpipe/lp_fs_llvm.c b/src/gallium/drivers/llvmpipe/lp_fs_llvm.c index 2c1a849785..ef1c8c32c2 100644 --- a/src/gallium/drivers/llvmpipe/lp_fs_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_fs_llvm.c @@ -47,7 +47,10 @@ typedef void -(*lp_shader_fs_func)(void *inputs, +(*lp_shader_fs_func)(void *pos, + void *a0, + void *dadx, + void *dady, void *consts, void *outputs, struct tgsi_sampler **samplers); @@ -65,6 +68,13 @@ struct lp_llvm_fragment_shader LLVMValueRef function; lp_shader_fs_func jit_function; + + union tgsi_exec_channel ALIGN16_ATTRIB pos[NUM_CHANNELS]; + union tgsi_exec_channel ALIGN16_ATTRIB a0[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + union tgsi_exec_channel ALIGN16_ATTRIB dadx[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + union tgsi_exec_channel ALIGN16_ATTRIB dady[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + + uint32_t magic; }; @@ -84,15 +94,19 @@ shader_generate(struct llvmpipe_screen *screen, union lp_type type; LLVMTypeRef elem_type; LLVMTypeRef vec_type; - LLVMTypeRef args[4]; - LLVMValueRef inputs_ptr; + LLVMTypeRef arg_types[7]; + LLVMTypeRef func_type; + LLVMValueRef pos_ptr; + LLVMValueRef a0_ptr; + LLVMValueRef dadx_ptr; + LLVMValueRef dady_ptr; LLVMValueRef consts_ptr; LLVMValueRef outputs_ptr; LLVMValueRef samplers_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; - LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][4]; - LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][4]; + LLVMValueRef pos[NUM_CHANNELS]; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; char name[32]; unsigned i, j; @@ -106,19 +120,31 @@ shader_generate(struct llvmpipe_screen *screen, elem_type = lp_build_elem_type(type); vec_type = lp_build_vec_type(type); - args[0] = LLVMPointerType(vec_type, 0); - args[1] = LLVMPointerType(elem_type, 0); - args[2] = LLVMPointerType(vec_type, 0); - args[3] = LLVMPointerType(LLVMInt8Type(), 0); - shader->function = LLVMAddFunction(screen->module, "shader", LLVMFunctionType(LLVMVoidType(), args, 4, 0)); - LLVMSetFunctionCallConv(shader->function, LLVMCCallConv); + arg_types[0] = LLVMPointerType(vec_type, 0); /* pos */ + arg_types[1] = LLVMPointerType(vec_type, 0); /* a0 */ + arg_types[2] = LLVMPointerType(vec_type, 0); /* dadx */ + arg_types[3] = LLVMPointerType(vec_type, 0); /* dady */ + arg_types[4] = LLVMPointerType(elem_type, 0); /* consts */ + arg_types[5] = LLVMPointerType(vec_type, 0); /* outputs */ + arg_types[6] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); - inputs_ptr = LLVMGetParam(shader->function, 0); - consts_ptr = LLVMGetParam(shader->function, 1); - outputs_ptr = LLVMGetParam(shader->function, 2); - samplers_ptr = LLVMGetParam(shader->function, 3); + shader->function = LLVMAddFunction(screen->module, "shader", func_type); + LLVMSetFunctionCallConv(shader->function, LLVMCCallConv); - LLVMSetValueName(inputs_ptr, "inputs"); + pos_ptr = LLVMGetParam(shader->function, 0); + a0_ptr = LLVMGetParam(shader->function, 1); + dadx_ptr = LLVMGetParam(shader->function, 2); + dady_ptr = LLVMGetParam(shader->function, 3); + consts_ptr = LLVMGetParam(shader->function, 4); + outputs_ptr = LLVMGetParam(shader->function, 5); + samplers_ptr = LLVMGetParam(shader->function, 6); + + LLVMSetValueName(pos_ptr, "pos"); + LLVMSetValueName(a0_ptr, "a0"); + LLVMSetValueName(dadx_ptr, "dadx"); + LLVMSetValueName(dady_ptr, "dady"); LLVMSetValueName(consts_ptr, "consts"); LLVMSetValueName(outputs_ptr, "outputs"); LLVMSetValueName(samplers_ptr, "samplers"); @@ -127,23 +153,23 @@ shader_generate(struct llvmpipe_screen *screen, builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - for(i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) { - for(j = 0; j < 4; ++j) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*4 + j, 0); - util_snprintf(name, sizeof name, "input%u.%c", i, "xywz"[j]); - inputs[i][j] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, inputs_ptr, &index, 1, ""), name); - } + for(j = 0; j < NUM_CHANNELS; ++j) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), j, 0); + util_snprintf(name, sizeof name, "pos.%c", "xyzw"[j]); + pos[j] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, pos_ptr, &index, 1, ""), name); } memset(outputs, 0, sizeof outputs); - lp_build_tgsi_soa(builder, tokens, type, inputs, consts_ptr, outputs, samplers_ptr); + lp_build_tgsi_soa(builder, tokens, type, + pos, a0_ptr, dadx_ptr, dady_ptr, + consts_ptr, outputs, samplers_ptr); for(i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) { - for(j = 0; j < 4; ++j) { + for(j = 0; j < NUM_CHANNELS; ++j) { if(outputs[i][j]) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*4 + j, 0); - util_snprintf(name, sizeof name, "output%u.%c", i, "xywz"[j]); + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*NUM_CHANNELS + j, 0); + util_snprintf(name, sizeof name, "output%u.%c", i, "xyzw"[j]); LLVMBuildStore(builder, outputs[i][j], LLVMBuildGEP(builder, outputs_ptr, &index, 1, name)); } } @@ -175,131 +201,56 @@ fs_llvm_prepare( const struct lp_fragment_shader *base, - -/** - * Evaluate a constant-valued coefficient at the position of the - * current quad. - */ static void -eval_constant_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) +setup_pos_vector(struct lp_llvm_fragment_shader *shader, + const struct tgsi_interp_coef *coef, + float x, float y) { - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + uint chan; + + /* do X */ + shader->pos[0].f[0] = x; + shader->pos[0].f[1] = x + 1; + shader->pos[0].f[2] = x; + shader->pos[0].f[3] = x + 1; + + /* do Y */ + shader->pos[1].f[0] = y; + shader->pos[1].f[1] = y; + shader->pos[1].f[2] = y + 1; + shader->pos[1].f[3] = y + 1; + + /* do Z and W for all fragments in the quad */ + for (chan = 2; chan < 4; chan++) { + const float dadx = coef->dadx[chan]; + const float dady = coef->dady[chan]; + const float a0 = coef->a0[chan] + dadx * x + dady * y; + shader->pos[chan].f[0] = a0; + shader->pos[chan].f[1] = a0 + dadx; + shader->pos[chan].f[2] = a0 + dady; + shader->pos[chan].f[3] = a0 + dadx + dady; } } -/** - * Evaluate a linear-valued coefficient at the position of the - * current quad. - */ -static void -eval_linear_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - mach->Inputs[attrib].xyzw[chan].f[0] = a0; - mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; - mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; - mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; -} - -/** - * Evaluate a perspective-valued coefficient at the position of the - * current quad. - */ -static void -eval_perspective_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - const float *w = mach->QuadPos.xyzw[3].f; - /* divide by W here */ - mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; - mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; - mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; - mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; -} - - -typedef void -(*eval_coef_func)(struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ); - static void -exec_declaration( - struct tgsi_exec_machine *mach, - const struct tgsi_full_declaration *decl ) +setup_coef_vector(struct lp_llvm_fragment_shader *shader, + const struct tgsi_interp_coef *coef) { - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - eval_coef_func eval; - - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; - mask = decl->Declaration.UsageMask; - - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - eval = eval_constant_coef; - break; - - case TGSI_INTERPOLATE_LINEAR: - eval = eval_linear_coef; - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - eval = eval_perspective_coef; - break; - - default: - eval = NULL; - assert( 0 ); - } - - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - eval( mach, i, j ); - } - } - } - else { - unsigned i, j; - - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - eval( mach, i, j ); - } - } - } + unsigned attrib, chan, i; + + for (attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; ++attrib) { + for (chan = 0; chan < NUM_CHANNELS; ++chan) { + for( i = 0; i < QUAD_SIZE; ++i ) { + shader->a0[attrib][chan].f[i] = coef[attrib].a0[chan]; + shader->dadx[attrib][chan].f[i] = coef[attrib].dadx[chan]; + shader->dady[attrib][chan].f[i] = coef[attrib].dady[chan]; } } } } + /* TODO: codegenerate the whole run function, skip this wrapper. * TODO: break dependency on tgsi_exec_machine struct * TODO: push Position calculation into the generated shader @@ -311,25 +262,24 @@ fs_llvm_run( const struct lp_fragment_shader *base, struct quad_header *quad ) { struct lp_llvm_fragment_shader *shader = lp_llvm_fragment_shader(base); - unsigned i; unsigned mask; /* Compute X, Y, Z, W vals for this quad */ - lp_setup_pos_vector(quad->posCoef, - (float)quad->input.x0, (float)quad->input.y0, - &machine->QuadPos); + setup_pos_vector(shader, + quad->posCoef, + (float)quad->input.x0, (float)quad->input.y0); + + setup_coef_vector(shader, + quad->coef); /* init kill mask */ tgsi_set_kill_mask(machine, 0x0); tgsi_set_exec_mask(machine, 1, 1, 1, 1); - /* execute declarations (interpolants) */ - for (i = 0; i < machine->NumDeclarations; i++) - exec_declaration( machine, &machine->Declarations[i] ); - memset(machine->Outputs, 0, sizeof machine->Outputs); - shader->jit_function( machine->Inputs, + shader->jit_function( shader->pos, + shader->a0, shader->dadx, shader->dady, machine->Consts, machine->Outputs, machine->Samplers); |