diff options
33 files changed, 347 insertions, 23 deletions
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 943ec44bcd..41269ee869 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -437,6 +437,7 @@ generate_vs(struct draw_llvm *llvm, LLVMBuilderRef builder, LLVMValueRef (*outputs)[NUM_CHANNELS], const LLVMValueRef (*inputs)[NUM_CHANNELS], + LLVMValueRef system_values_array, LLVMValueRef context_ptr, struct lp_build_sampler_soa *draw_sampler) { @@ -468,6 +469,7 @@ generate_vs(struct draw_llvm *llvm, vs_type, NULL /*struct lp_build_mask_context *mask*/, consts_ptr, + system_values_array, NULL /*pos*/, inputs, outputs, @@ -1118,7 +1120,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef start, end, count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; LLVMValueRef instance_id; + LLVMValueRef system_values_array; struct draw_context *draw = llvm->draw; + const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info; unsigned i, j; struct lp_build_context bld; struct lp_build_loop_state lp_loop; @@ -1179,6 +1183,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_build_context_init(&bld, llvm->gallivm, lp_type_int(32)); + system_values_array = lp_build_system_values_array(gallivm, vs_info, + instance_id, NULL); + end = lp_build_add(&bld, start, count); step = lp_build_const_int32(gallivm, max_vertices); @@ -1233,6 +1240,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) builder, outputs, ptr_aos, + system_values_array, context_ptr, sampler); @@ -1263,8 +1271,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) /* store clipmask in vertex header and positions in data */ convert_to_aos(gallivm, io, outputs, clipmask, - draw->vs.vertex_shader->info.num_outputs, - max_vertices); + vs_info->num_outputs, max_vertices); } lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE); @@ -1315,7 +1322,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; LLVMValueRef instance_id; + LLVMValueRef system_values_array; struct draw_context *draw = llvm->draw; + const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info; unsigned i, j; struct lp_build_context bld; struct lp_build_loop_state lp_loop; @@ -1376,6 +1385,10 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian lp_build_context_init(&bld, gallivm, lp_type_int(32)); + system_values_array = lp_build_system_values_array(gallivm, vs_info, + instance_id, NULL); + + step = lp_build_const_int32(gallivm, max_vertices); /* code generated texture sampling */ @@ -1438,6 +1451,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian builder, outputs, ptr_aos, + system_values_array, context_ptr, sampler); @@ -1471,8 +1485,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian * and transformed positions in data */ convert_to_aos(gallivm, io, outputs, clipmask, - draw->vs.vertex_shader->info.num_outputs, - max_vertices); + vs_info->num_outputs, max_vertices); } lp_build_loop_end_cond(&lp_loop, fetch_count, step, LLVMIntUGE); diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 667eb50785..c41d7c42a0 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -99,6 +99,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, constants, const_size); + if (shader->info.uses_instanceid) { + unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INSTANCEID]; + assert(i < Elements(machine->SystemValue)); + machine->SystemValue[i][0] = shader->draw->instance_id; + } + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index dee7c0da9b..d55b9b0807 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -71,6 +71,12 @@ vs_sse_prepare( struct draw_vertex_shader *base, struct tgsi_exec_machine *machine = shader->machine; machine->Samplers = draw->vs.samplers; + + if (base->info.uses_instanceid) { + unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INSTANCEID]; + assert(i < Elements(machine->SystemValue)); + machine->SystemValue[i][0] = base->draw->instance_id; + } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 40186befb9..9713d10048 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -180,6 +180,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, + LLVMValueRef system_values_array, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], @@ -199,4 +200,11 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm, const struct tgsi_shader_info *info); +LLVMValueRef +lp_build_system_values_array(struct gallivm_state *gallivm, + const struct tgsi_shader_info *info, + LLVMValueRef instance_id, + LLVMValueRef facing); + + #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 1b5a8a5903..d1585c8e2b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -157,6 +157,8 @@ struct lp_build_tgsi_soa_context */ LLVMValueRef inputs_array; + LLVMValueRef system_values_array; + const struct tgsi_shader_info *info; /** bitmask indicating which register files are accessed indirectly */ unsigned indirect_files; @@ -759,6 +761,23 @@ emit_fetch( } break; + case TGSI_FILE_SYSTEM_VALUE: + assert(!reg->Register.Indirect); + { + LLVMValueRef index; /* index into the system value array */ + LLVMValueRef scalar, scalar_ptr; + + index = lp_build_const_int32(gallivm, + reg->Register.Index * 4 + swizzle); + + scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, + &index, 1, ""); + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + + res = lp_build_broadcast_scalar(&bld->base, scalar); + } + break; + default: assert(0 && "invalid src register in emit_fetch()"); return bld->base.undef; @@ -2322,6 +2341,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, + LLVMValueRef system_values_array, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], @@ -2411,6 +2431,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, } } + bld.system_values_array = system_values_array; + tgsi_parse_init( &parse, tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { @@ -2512,3 +2534,54 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, FREE( bld.instructions ); } + +/** + * Build up the system values array out of individual values such as + * the instance ID, front-face, primitive ID, etc. The shader info is + * used to determine which system values are needed and where to put + * them in the system values array. + * + * XXX only instance ID is implemented at this time. + * + * The system values register file is similar to the constants buffer. + * Example declaration: + * DCL SV[0], INSTANCEID + * Example instruction: + * MOVE foo, SV[0].xxxx; + * + * \return LLVM float array (interpreted as float [][4]) + */ +LLVMValueRef +lp_build_system_values_array(struct gallivm_state *gallivm, + const struct tgsi_shader_info *info, + LLVMValueRef instance_id, + LLVMValueRef facing) +{ + LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values); + LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context); + LLVMValueRef array = lp_build_array_alloca(gallivm, float_t, + size, "sysvals_array"); + unsigned i; + + for (i = 0; i < info->num_system_values; i++) { + LLVMValueRef index = lp_build_const_int32(gallivm, i * 4); + LLVMValueRef ptr, value; + + switch (info->system_value_semantic_name[i]) { + case TGSI_SEMANTIC_INSTANCEID: + /* convert instance ID from int to float */ + value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t, + "sysval_instanceid"); + break; + case TGSI_SEMANTIC_FACE: + /* fall-through */ + default: + assert(0 && "unexpected semantic in build_system_values_array()"); + } + + ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, ""); + LLVMBuildStore(gallivm->builder, value, ptr); + } + + return array; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 7892a67f04..35b2742351 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1038,7 +1038,6 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, break; case TGSI_FILE_INPUT: - case TGSI_FILE_SYSTEM_VALUE: for (i = 0; i < QUAD_SIZE; i++) { /* if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { @@ -1053,6 +1052,15 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, } break; + case TGSI_FILE_SYSTEM_VALUE: + /* XXX no swizzling at this point. Will be needed if we put + * gl_FragCoord, for example, in a sys value register. + */ + for (i = 0; i < QUAD_SIZE; i++) { + chan->f[i] = mach->SystemValue[index->i[i]][0]; + } + break; + case TGSI_FILE_TEMPORARY: for (i = 0; i < QUAD_SIZE; i++) { assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); @@ -1907,8 +1915,7 @@ exec_declaration(struct tgsi_exec_machine *mach, const struct tgsi_full_declaration *decl) { if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { - if (decl->Declaration.File == TGSI_FILE_INPUT || - decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + if (decl->Declaration.File == TGSI_FILE_INPUT) { uint first, last, mask; first = decl->Range.First; @@ -1921,6 +1928,7 @@ exec_declaration(struct tgsi_exec_machine *mach, * ureg code to emit the right UsageMask value (WRITEMASK_X). * Then, we could remove the tgsi_exec_machine::Face field. */ + /* XXX make FACE a system value */ if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { uint i; @@ -1962,8 +1970,13 @@ exec_declaration(struct tgsi_exec_machine *mach, } } } + + if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First; + } } + typedef void (* micro_op)(union tgsi_exec_channel *dst); static void diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index b5ebbfbfaa..6c204c7371 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -31,6 +31,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" #if defined __cplusplus extern "C" { @@ -181,6 +182,8 @@ struct tgsi_sampler /* The maximum total number of vertices */ #define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS) +#define TGSI_MAX_MISC_INPUTS 8 + /** function call/activation record */ struct tgsi_call_record { @@ -228,6 +231,10 @@ struct tgsi_exec_machine struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS]; struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES]; + /* System values */ + unsigned SysSemanticToIndex[TGSI_SEMANTIC_COUNT]; + float SystemValue[TGSI_MAX_MISC_INPUTS][4]; + struct tgsi_exec_vector *Addrs; struct tgsi_exec_vector *Predicates; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 3521847b61..537a0f6c5e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -294,7 +294,6 @@ emit_fetch(struct gen_context *gen, case TGSI_SWIZZLE_W: switch (reg->Register.File) { case TGSI_FILE_INPUT: - case TGSI_FILE_SYSTEM_VALUE: { int offset = (reg->Register.Index * 4 + swizzle) * 16; int offset_reg = emit_li_offset(gen, offset); @@ -302,6 +301,9 @@ emit_fetch(struct gen_context *gen, ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); } break; + case TGSI_FILE_SYSTEM_VALUE: + assert(!"unhandled system value in tgsi_ppc.c"); + break; case TGSI_FILE_TEMPORARY: if (is_ppc_vec_temporary(reg)) { /* use PPC vec register */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 6585da3e83..83c6ac75e5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -143,7 +143,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_count[file]++; info->file_max[file] = MAX2(info->file_max[file], (int)reg); - if (file == TGSI_FILE_INPUT || file == TGSI_FILE_SYSTEM_VALUE) { + if (file == TGSI_FILE_INPUT) { info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate; @@ -151,6 +151,23 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap; info->num_inputs++; } + else if (file == TGSI_FILE_SYSTEM_VALUE) { + unsigned index = fulldecl->Range.First; + unsigned semName = fulldecl->Semantic.Name; + + info->system_value_semantic_name[index] = semName; + info->num_system_values = MAX2(info->num_system_values, + index + 1); + + /* + info->system_value_semantic_name[info->num_system_values++] = + fulldecl->Semantic.Name; + */ + + if (fulldecl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { + info->uses_instanceid = TRUE; + } + } else if (file == TGSI_FILE_OUTPUT) { info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 104097fbc0..53ab3d509d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -51,6 +51,9 @@ struct tgsi_shader_info ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + ubyte num_system_values; + ubyte system_value_semantic_name[PIPE_MAX_SHADER_INPUTS]; + uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ @@ -64,6 +67,7 @@ struct tgsi_shader_info boolean writes_stencil; /**< does fragment shader write stencil value? */ boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ + boolean uses_instanceid; /** * Bitmask indicating which register files are accessed with diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 086d983a73..3f2cda860e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -163,6 +163,14 @@ get_immediate_base( void ) reg_DX ); } +static struct x86_reg +get_system_value_base( void ) +{ + return x86_make_disp( + get_machine_base(), + Offset(struct tgsi_exec_machine, SystemValue) ); +} + /** * Data access helpers. @@ -229,6 +237,16 @@ get_temp( } static struct x86_reg +get_system_value( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_system_value_base(), /* base */ + (vec * 4 + chan) * 4 ); /* byte offset from base */ +} + +static struct x86_reg get_coef( unsigned vec, unsigned chan, @@ -423,6 +441,30 @@ emit_tempf( } /** + * Copy a system value to xmm register + * \param xmm the destination xmm register + * \param vec the source system value register + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_system_value( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_system_value( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +/** * Load an xmm register with an input attrib coefficient (a0, dadx or dady) * \param xmm the destination xmm register * \param vec the src input/attribute coefficient index @@ -1281,8 +1323,15 @@ emit_fetch( swizzle ); break; - case TGSI_FILE_INPUT: case TGSI_FILE_SYSTEM_VALUE: + emit_system_value( + func, + xmm, + reg->Register.Index, + swizzle ); + break; + + case TGSI_FILE_INPUT: emit_inputf( func, xmm, @@ -2636,8 +2685,7 @@ emit_declaration( struct x86_function *func, struct tgsi_full_declaration *decl ) { - if( decl->Declaration.File == TGSI_FILE_INPUT || - decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { unsigned first, last, mask; unsigned i, j; diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 09edbaa673..ab90097add 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -36,6 +36,7 @@ The integer capabilities: bound. * ``OCCLUSION_QUERY``: Whether occlusion queries are available. * ``TIMER_QUERY``: Whether timer queries are available. +* ``INSTANCED_DRAWING``: indicates support for instanced drawing. * ``TEXTURE_SHADOW_MAP``: indicates whether the fragment shader hardware can do the depth texture / Z comparison operation in TEX instructions for shadow testing. diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 9459a3cd11..b6919a5c6d 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -164,6 +164,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_DEPTH_CLAMP: return 0; + case PIPE_CAP_INSTANCED_DRAWING: + return 1; default: return 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index ae207617cc..1b9119eda0 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -334,7 +334,8 @@ generate_fs(struct gallivm_state *gallivm, /* Build the actual shader */ lp_build_tgsi_soa(gallivm, tokens, type, &mask, - consts_ptr, interp->pos, interp->inputs, + consts_ptr, NULL, /* sys values array */ + interp->pos, interp->inputs, outputs, sampler, &shader->info.base); /* Alpha test */ diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index cbdea19af4..e19f2e6fc7 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -123,6 +123,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 0; case PIPE_CAP_SHADER_STENCIL_EXPORT: return 1; + case PIPE_CAP_INSTANCED_DRAWING: + return 1; default: return 0; } diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index f5af15ff9f..2135c19dcf 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -462,6 +462,7 @@ enum pipe_cap { PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER, PIPE_CAP_DEPTH_CLAMP, PIPE_CAP_SHADER_STENCIL_EXPORT, + PIPE_CAP_INSTANCED_DRAWING, }; /* Shader caps not specific to any single stage */ diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 8dbe66927d..cbeacd5633 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -180,6 +180,15 @@ _mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp, state->ARB_draw_buffers_enable = (ext_mode != extension_disable); state->ARB_draw_buffers_warn = (ext_mode == extension_warn); } + } else if (strcmp(name, "GL_ARB_draw_instanced") == 0) { + /* This extension is only supported in vertex shaders. + */ + if (state->target != vertex_shader) { + unsupported = true; + } else { + state->ARB_draw_instanced_enable = (ext_mode != extension_disable); + state->ARB_draw_instanced_warn = (ext_mode == extension_warn); + } } else if (strcmp(name, "GL_ARB_explicit_attrib_location") == 0) { state->ARB_explicit_attrib_location_enable = (ext_mode != extension_disable); diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 39c10b99e8..030d27a262 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -132,6 +132,8 @@ struct _mesa_glsl_parse_state { /*@{*/ unsigned ARB_draw_buffers_enable:1; unsigned ARB_draw_buffers_warn:1; + unsigned ARB_draw_instanced_enable:1; + unsigned ARB_draw_instanced_warn:1; unsigned ARB_explicit_attrib_location_enable:1; unsigned ARB_explicit_attrib_location_warn:1; unsigned ARB_fragment_coord_conventions_enable:1; diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 57e89f0f6b..7b83c2836e 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -225,6 +225,7 @@ enum ir_variable_mode { ir_var_in, ir_var_out, ir_var_inout, + ir_var_system_value, /**< Ex: front-face, instance-id, etc. */ ir_var_temporary /**< Temporary variable generated during compilation. */ }; diff --git a/src/glsl/ir_set_program_inouts.cpp b/src/glsl/ir_set_program_inouts.cpp index 714281539a..085456533c 100644 --- a/src/glsl/ir_set_program_inouts.cpp +++ b/src/glsl/ir_set_program_inouts.cpp @@ -79,6 +79,8 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len) for (int i = 0; i < len; i++) { if (var->mode == ir_var_in) prog->InputsRead |= BITFIELD64_BIT(var->location + offset + i); + else if (var->mode == ir_var_system_value) + prog->SystemValuesRead |= (1 << (var->location + offset + i)); else prog->OutputsWritten |= BITFIELD64_BIT(var->location + offset + i); } @@ -134,7 +136,8 @@ ir_visitor_status ir_set_program_inouts_visitor::visit(ir_variable *ir) { if (ir->mode == ir_var_in || - ir->mode == ir_var_out) { + ir->mode == ir_var_out || + ir->mode == ir_var_system_value) { hash_table_insert(this->ht, ir, ir); } @@ -158,5 +161,6 @@ do_set_program_inouts(exec_list *instructions, struct gl_program *prog) prog->InputsRead = 0; prog->OutputsWritten = 0; + prog->SystemValuesRead = 0; visit_list_elements(&v, instructions); } diff --git a/src/glsl/ir_variable.cpp b/src/glsl/ir_variable.cpp index 5b8281e16e..73da28faf4 100644 --- a/src/glsl/ir_variable.cpp +++ b/src/glsl/ir_variable.cpp @@ -30,6 +30,11 @@ static void generate_ARB_draw_buffers_variables(exec_list *, struct _mesa_glsl_parse_state *, bool, _mesa_glsl_parser_targets); +static void +generate_ARB_draw_instanced_variables(exec_list *, + struct _mesa_glsl_parse_state *, + bool, _mesa_glsl_parser_targets); + static ir_variable * add_variable(const char *name, enum ir_variable_mode mode, int slot, const glsl_type *type, exec_list *instructions, @@ -41,6 +46,7 @@ add_variable(const char *name, enum ir_variable_mode mode, int slot, case ir_var_auto: case ir_var_in: case ir_var_uniform: + case ir_var_system_value: var->read_only = true; break; case ir_var_inout: @@ -324,8 +330,13 @@ initialize_vs_variables(exec_list *instructions, generate_130_vs_variables(instructions, state); break; } + + if (state->ARB_draw_instanced_enable) + generate_ARB_draw_instanced_variables(instructions, state, false, + vertex_shader); } + /* This function should only be called for ES, not desktop GL. */ static void generate_100ES_fs_variables(exec_list *instructions, @@ -422,6 +433,27 @@ generate_ARB_draw_buffers_variables(exec_list *instructions, } } + +static void +generate_ARB_draw_instanced_variables(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + bool warn, + _mesa_glsl_parser_targets target) +{ + /* gl_InstanceIDARB is only available in the vertex shader. + */ + if (target == vertex_shader) { + ir_variable *const inst = + add_variable("gl_InstanceIDARB", ir_var_system_value, + SYSTEM_VALUE_INSTANCE_ID, + glsl_type::int_type, instructions, state->symbols); + + if (warn) + inst->warn_extension = "GL_ARB_draw_instanced"; + } +} + + static void generate_ARB_shader_stencil_export_variables(exec_list *instructions, struct _mesa_glsl_parse_state *state, diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index 2d146caca5..9b041aafe4 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -73,6 +73,7 @@ initialize_context(struct gl_context *ctx, gl_api api) ctx->API = api; ctx->Extensions.ARB_draw_buffers = GL_TRUE; + ctx->Extensions.ARB_draw_instanced = GL_TRUE; ctx->Extensions.ARB_fragment_coord_conventions = GL_TRUE; ctx->Extensions.EXT_texture_array = GL_TRUE; ctx->Extensions.NV_texture_rectangle = GL_TRUE; diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index c41ae4ad2d..24404993c6 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -380,6 +380,7 @@ _mesa_enable_sw_extensions(struct gl_context *ctx) ctx->Extensions.ARB_depth_texture = GL_TRUE; /*ctx->Extensions.ARB_draw_buffers = GL_TRUE;*/ ctx->Extensions.ARB_draw_elements_base_vertex = GL_TRUE; + ctx->Extensions.ARB_draw_instanced = GL_TRUE; ctx->Extensions.ARB_explicit_attrib_location = GL_TRUE; ctx->Extensions.ARB_fragment_coord_conventions = GL_TRUE; #if FEATURE_ARB_fragment_program diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index f76e1eb2ee..1522332e8a 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1748,11 +1748,24 @@ typedef enum PROGRAM_WRITE_ONLY, /**< A dummy, write-only register */ PROGRAM_ADDRESS, /**< machine->AddressReg */ PROGRAM_SAMPLER, /**< for shader samplers, compile-time only */ + PROGRAM_SYSTEM_VALUE,/**< InstanceId, PrimitiveID, etc. */ PROGRAM_UNDEFINED, /**< Invalid/TBD value */ PROGRAM_FILE_MAX } gl_register_file; +/** + * If the register file is PROGRAM_SYSTEM_VALUE, the register index will be + * one of these values. + */ +typedef enum +{ + SYSTEM_VALUE_FRONT_FACE, /**< Fragment shader only (not done yet) */ + SYSTEM_VALUE_INSTANCE_ID, /**< Vertex shader only */ + SYSTEM_VALUE_MAX /**< Number of values */ +} gl_system_value; + + /** Vertex and fragment instructions */ struct prog_instruction; struct gl_program_parameter_list; @@ -1775,6 +1788,7 @@ struct gl_program GLbitfield InputsRead; /**< Bitmask of which input regs are read */ GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */ + GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */ GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */ GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */ GLbitfield TexturesUsed[MAX_TEXTURE_UNITS]; /**< TEXTURE_x_BIT bitmask */ diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 0929b95950..786fdfbd23 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1462,6 +1462,7 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir) case ir_var_in: case ir_var_out: case ir_var_inout: + case ir_var_system_value: /* The linker assigns locations for varyings and attributes, * including deprecated builtins (like gl_Color), user-assign * generic attributes (glBindVertexLocation), and @@ -1484,6 +1485,10 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir) ir->var->type->gl_type, ir->var->location - VERT_ATTRIB_GENERIC0); } + } else if (ir->var->mode == ir_var_system_value) { + entry = new(mem_ctx) variable_storage(ir->var, + PROGRAM_SYSTEM_VALUE, + ir->var->location); } else { entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_OUTPUT, diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index dd15e9a1cc..e7553c69db 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -159,6 +159,10 @@ get_src_register_pointer(const struct prog_src_register *source, return ZeroVec; return prog->Parameters->ParameterValues[reg]; + case PROGRAM_SYSTEM_VALUE: + assert(reg < Elements(machine->SystemValues)); + return machine->SystemValues[reg]; + default: _mesa_problem(NULL, "Invalid src register file %d in get_src_register_pointer()", diff --git a/src/mesa/program/prog_execute.h b/src/mesa/program/prog_execute.h index cefd468c36..cdf37082a0 100644 --- a/src/mesa/program/prog_execute.h +++ b/src/mesa/program/prog_execute.h @@ -61,6 +61,7 @@ struct gl_program_machine GLfloat (*EnvParams)[4]; /**< Vertex or Fragment env parameters */ GLuint CondCodes[4]; /**< COND_* value for x/y/z/w */ GLint AddressReg[MAX_PROGRAM_ADDRESS_REGS][4]; + GLfloat SystemValues[SYSTEM_VALUE_MAX][4]; const GLubyte *Samplers; /** Array mapping sampler var to tex unit */ diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index abebf392c0..484596af76 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -72,6 +72,8 @@ _mesa_register_file_name(gl_register_file f) return "ADDR"; case PROGRAM_SAMPLER: return "SAMPLER"; + case PROGRAM_SYSTEM_VALUE: + return "SYSVAL"; case PROGRAM_UNDEFINED: return "UNDEFINED"; default: @@ -310,6 +312,9 @@ reg_string(gl_register_file f, GLint index, gl_prog_print_mode mode, case PROGRAM_UNIFORM: /* extension */ sprintf(str, "uniform[%s%d]", addr, index); break; + case PROGRAM_SYSTEM_VALUE: + sprintf(str, "sysvalue[%s%d]", addr, index); + break; case PROGRAM_STATE_VAR: { struct gl_program_parameter *param diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index d240cab1a7..df4f5cf8b5 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -461,4 +461,8 @@ void st_init_extensions(struct st_context *st) if (screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) { ctx->Extensions.ARB_shader_stencil_export = GL_TRUE; } + + if (screen->get_param(screen, PIPE_CAP_INSTANCED_DRAWING)) { + ctx->Extensions.ARB_draw_instanced = GL_TRUE; + } } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index f848462310..42f1c2017f 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -72,6 +72,7 @@ struct st_translate { struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; struct ureg_dst address[1]; struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + struct ureg_src systemValues[SYSTEM_VALUE_MAX]; /* Extra info for handling point size clamping in vertex shader */ struct ureg_dst pointSizeResult; /**< Actual point size output register */ @@ -104,6 +105,13 @@ struct st_translate { }; +/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ +static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { + TGSI_SEMANTIC_FACE, + TGSI_SEMANTIC_INSTANCEID +}; + + /** * Make note of a branch to a label in the TGSI code. * After we've emitted all instructions, we'll go over the list @@ -245,6 +253,10 @@ src_register( struct st_translate *t, case PROGRAM_ADDRESS: return ureg_src(t->address[index]); + case PROGRAM_SYSTEM_VALUE: + assert(index < Elements(t->systemValues)); + return t->systemValues[index]; + default: debug_assert( 0 ); return ureg_src_undef(); @@ -1089,6 +1101,21 @@ st_translate_mesa_program( t->address[0] = ureg_DECL_address( ureg ); } + /* Declare misc input registers + */ + { + GLbitfield sysInputs = program->SystemValuesRead; + unsigned numSys = 0; + for (i = 0; sysInputs; i++) { + if (sysInputs & (1 << i)) { + unsigned semName = mesa_sysval_to_semantic[i]; + t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); + numSys++; + sysInputs &= ~(1 << i); + } + } + } + if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) { /* If temps are accessed with indirect addressing, declare temporaries * in sequential order. Else, we declare them on demand elsewhere. diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h index bc01646247..6a9444216c 100644 --- a/src/mesa/tnl/t_context.h +++ b/src/mesa/tnl/t_context.h @@ -527,6 +527,8 @@ typedef struct GLubyte *block[VERT_ATTRIB_MAX]; GLuint nr_blocks; + GLuint CurInstance; + } TNLcontext; diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index 30f1bf323c..bdb893eba2 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -453,6 +453,7 @@ void _tnl_draw_prims( struct gl_context *ctx, */ struct gl_buffer_object *bo[VERT_ATTRIB_MAX + 1]; GLuint nr_bo = 0; + GLuint inst; for (i = 0; i < nr_prims;) { GLuint this_nr_prims; @@ -470,15 +471,19 @@ void _tnl_draw_prims( struct gl_context *ctx, /* Binding inputs may imply mapping some vertex buffer objects. * They will need to be unmapped below. */ - bind_prims(ctx, &prim[i], this_nr_prims); - bind_inputs(ctx, arrays, max_index + prim[i].basevertex + 1, - bo, &nr_bo); - bind_indices(ctx, ib, bo, &nr_bo); + for (inst = 0; inst < prim[i].num_instances; inst++) { - TNL_CONTEXT(ctx)->Driver.RunPipeline(ctx); + bind_prims(ctx, &prim[i], this_nr_prims); + bind_inputs(ctx, arrays, max_index + prim[i].basevertex + 1, + bo, &nr_bo); + bind_indices(ctx, ib, bo, &nr_bo); - unmap_vbos(ctx, bo, nr_bo); - free_space(ctx); + tnl->CurInstance = inst; + TNL_CONTEXT(ctx)->Driver.RunPipeline(ctx); + + unmap_vbos(ctx, bo, nr_bo); + free_space(ctx); + } i += this_nr_prims; } diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c index 94372bbafb..f5d8f7477a 100644 --- a/src/mesa/tnl/t_vb_program.c +++ b/src/mesa/tnl/t_vb_program.c @@ -220,7 +220,8 @@ _tnl_program_string(struct gl_context *ctx, GLenum target, struct gl_program *pr * Initialize virtual machine state prior to executing vertex program. */ static void -init_machine(struct gl_context *ctx, struct gl_program_machine *machine) +init_machine(struct gl_context *ctx, struct gl_program_machine *machine, + GLuint instID) { /* Input registers get initialized from the current vertex attribs */ memcpy(machine->VertAttribs, ctx->Current.Attrib, @@ -256,6 +257,8 @@ init_machine(struct gl_context *ctx, struct gl_program_machine *machine) machine->FetchTexelDeriv = NULL; /* not used by vertex programs */ machine->Samplers = ctx->VertexProgram._Current->Base.SamplerUnits; + + machine->SystemValues[SYSTEM_VALUE_INSTANCE_ID][0] = (GLfloat) instID; } @@ -341,7 +344,7 @@ run_vp( struct gl_context *ctx, struct tnl_pipeline_stage *stage ) for (i = 0; i < VB->Count; i++) { GLuint attr; - init_machine(ctx, machine); + init_machine(ctx, machine, tnl->CurInstance); #if 0 printf("Input %d: %f, %f, %f, %f\n", i, |